22
33# This program is licensed under the Apache License 2.0.
44# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5+ import logging
56from typing import Any , Dict , Optional
67
78import numpy as np
1314__all__ = ["synthesize_page" , "synthesize_kie_page" ]
1415
1516
17+ # Global variable to avoid multiple warnings
18+ ROTATION_WARNING = False
19+
20+
21+ def _warn_rotation (entry : Dict [str , Any ]) -> None : # pragma: no cover
22+ global ROTATION_WARNING
23+ if not ROTATION_WARNING and len (entry ["geometry" ]) == 4 :
24+ logging .warning ("Polygons with larger rotations will lead to inaccurate rendering" )
25+ ROTATION_WARNING = True
26+
27+
28+ def _synthesize (
29+ response : Image .Image ,
30+ entry : Dict [str , Any ],
31+ w : int ,
32+ h : int ,
33+ draw_proba : bool = False ,
34+ font_family : Optional [str ] = None ,
35+ smoothing_factor : float = 0.75 ,
36+ min_font_size : int = 6 ,
37+ max_font_size : int = 50 ,
38+ ) -> Image .Image :
39+ if len (entry ["geometry" ]) == 2 :
40+ (xmin , ymin ), (xmax , ymax ) = entry ["geometry" ]
41+ polygon = [(xmin , ymin ), (xmax , ymin ), (xmax , ymax ), (xmin , ymax )]
42+ else :
43+ polygon = entry ["geometry" ]
44+
45+ # Calculate the bounding box of the word
46+ x_coords , y_coords = zip (* polygon )
47+ xmin , ymin , xmax , ymax = (
48+ int (round (w * min (x_coords ))),
49+ int (round (h * min (y_coords ))),
50+ int (round (w * max (x_coords ))),
51+ int (round (h * max (y_coords ))),
52+ )
53+ word_width = xmax - xmin
54+ word_height = ymax - ymin
55+
56+ # If lines are provided instead of words, concatenate the word entries
57+ if "words" in entry :
58+ word_text = " " .join (word ["value" ] for word in entry ["words" ])
59+ else :
60+ word_text = entry ["value" ]
61+ # Find the optimal font size
62+ try :
63+ font_size = min (word_height , max_font_size )
64+ font = get_font (font_family , font_size )
65+ text_width , text_height = font .getbbox (word_text )[2 :4 ]
66+
67+ while (text_width > word_width or text_height > word_height ) and font_size > min_font_size :
68+ font_size = max (int (font_size * smoothing_factor ), min_font_size )
69+ font = get_font (font_family , font_size )
70+ text_width , text_height = font .getbbox (word_text )[2 :4 ]
71+ except ValueError :
72+ font = get_font (font_family , min_font_size )
73+
74+ # Create a mask for the word
75+ mask = Image .new ("L" , (w , h ), 0 )
76+ ImageDraw .Draw (mask ).polygon ([(int (round (w * x )), int (round (h * y ))) for x , y in polygon ], fill = 255 )
77+
78+ # Draw the word text
79+ d = ImageDraw .Draw (response )
80+ try :
81+ try :
82+ d .text ((xmin , ymin ), word_text , font = font , fill = (0 , 0 , 0 ), anchor = "lt" )
83+ except UnicodeEncodeError :
84+ d .text ((xmin , ymin ), anyascii (word_text ), font = font , fill = (0 , 0 , 0 ), anchor = "lt" )
85+ # Catch generic exceptions to avoid crashing the whole rendering
86+ except Exception : # pragma: no cover
87+ logging .warning (f"Could not render word: { word_text } " )
88+
89+ if draw_proba :
90+ confidence = (
91+ entry ["confidence" ]
92+ if "confidence" in entry
93+ else sum (w ["confidence" ] for w in entry ["words" ]) / len (entry ["words" ])
94+ )
95+ p = int (255 * confidence )
96+ color = (255 - p , 0 , p ) # Red to blue gradient based on probability
97+ d .rectangle ([(xmin , ymin ), (xmax , ymax )], outline = color , width = 2 )
98+
99+ prob_font = get_font (font_family , 20 )
100+ prob_text = f"{ confidence :.2f} "
101+ prob_text_width , prob_text_height = prob_font .getbbox (prob_text )[2 :4 ]
102+
103+ # Position the probability slightly above the bounding box
104+ prob_x_offset = (word_width - prob_text_width ) // 2
105+ prob_y_offset = ymin - prob_text_height - 2
106+ prob_y_offset = max (0 , prob_y_offset )
107+
108+ d .text ((xmin + prob_x_offset , prob_y_offset ), prob_text , font = prob_font , fill = color , anchor = "lt" )
109+
110+ return response
111+
112+
16113def synthesize_page (
17114 page : Dict [str , Any ],
18115 draw_proba : bool = False ,
19116 font_family : Optional [str ] = None ,
117+ smoothing_factor : float = 0.95 ,
118+ min_font_size : int = 8 ,
119+ max_font_size : int = 50 ,
20120) -> np .ndarray :
21121 """Draw a the content of the element page (OCR response) on a blank page.
22122
23123 Args:
24124 ----
25125 page: exported Page object to represent
26126 draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0
27- font_size: size of the font, default font = 13
28127 font_family: family of the font
128+ smoothing_factor: factor to smooth the font size
129+ min_font_size: minimum font size
130+ max_font_size: maximum font size
29131
30132 Returns:
31133 -------
32134 the synthesized page
33135 """
34136 # Draw template
35137 h , w = page ["dimensions" ]
36- response = 255 * np . ones (( h , w , 3 ), dtype = np . int32 )
138+ response = Image . new ( "RGB" , ( w , h ), color = ( 255 , 255 , 255 ) )
37139
38- # Draw each word
39140 for block in page ["blocks" ]:
40- for line in block ["lines" ]:
41- for word in line ["words" ]:
42- # Get absolute word geometry
43- (xmin , ymin ), (xmax , ymax ) = word ["geometry" ]
44- xmin , xmax = int (round (w * xmin )), int (round (w * xmax ))
45- ymin , ymax = int (round (h * ymin )), int (round (h * ymax ))
46-
47- # White drawing context adapted to font size, 0.75 factor to convert pts --> pix
48- font = get_font (font_family , int (0.75 * (ymax - ymin )))
49- img = Image .new ("RGB" , (xmax - xmin , ymax - ymin ), color = (255 , 255 , 255 ))
50- d = ImageDraw .Draw (img )
51- # Draw in black the value of the word
52- try :
53- d .text ((0 , 0 ), word ["value" ], font = font , fill = (0 , 0 , 0 ))
54- except UnicodeEncodeError :
55- # When character cannot be encoded, use its anyascii version
56- d .text ((0 , 0 ), anyascii (word ["value" ]), font = font , fill = (0 , 0 , 0 ))
57-
58- # Colorize if draw_proba
59- if draw_proba :
60- p = int (255 * word ["confidence" ])
61- mask = np .where (np .array (img ) == 0 , 1 , 0 )
62- proba : np .ndarray = np .array ([255 - p , 0 , p ])
63- color = mask * proba [np .newaxis , np .newaxis , :]
64- white_mask = 255 * (1 - mask )
65- img = color + white_mask
66-
67- # Write to response page
68- response [ymin :ymax , xmin :xmax , :] = np .array (img )
69-
70- return response
141+ # If lines are provided use these to get better rendering results
142+ if len (block ["lines" ]) > 1 :
143+ for line in block ["lines" ]:
144+ _warn_rotation (block ) # pragma: no cover
145+ response = _synthesize (
146+ response = response ,
147+ entry = line ,
148+ w = w ,
149+ h = h ,
150+ draw_proba = draw_proba ,
151+ font_family = font_family ,
152+ smoothing_factor = smoothing_factor ,
153+ min_font_size = min_font_size ,
154+ max_font_size = max_font_size ,
155+ )
156+ # Otherwise, draw each word
157+ else :
158+ for line in block ["lines" ]:
159+ _warn_rotation (block ) # pragma: no cover
160+ for word in line ["words" ]:
161+ response = _synthesize (
162+ response = response ,
163+ entry = word ,
164+ w = w ,
165+ h = h ,
166+ draw_proba = draw_proba ,
167+ font_family = font_family ,
168+ smoothing_factor = smoothing_factor ,
169+ min_font_size = min_font_size ,
170+ max_font_size = max_font_size ,
171+ )
172+
173+ return np .array (response , dtype = np .uint8 )
71174
72175
73176def synthesize_kie_page (
@@ -81,46 +184,29 @@ def synthesize_kie_page(
81184 ----
82185 page: exported Page object to represent
83186 draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0
84- font_size: size of the font, default font = 13
85187 font_family: family of the font
188+ smoothing_factor: factor to smooth the font size
189+ min_font_size: minimum font size
190+ max_font_size: maximum font size
86191
87192 Returns:
88193 -------
89194 the synthesized page
90195 """
91196 # Draw template
92197 h , w = page ["dimensions" ]
93- response = 255 * np . ones (( h , w , 3 ), dtype = np . int32 )
198+ response = Image . new ( "RGB" , ( w , h ), color = ( 255 , 255 , 255 ) )
94199
95200 # Draw each word
96201 for predictions in page ["predictions" ].values ():
97202 for prediction in predictions :
98- # Get aboslute word geometry
99- (xmin , ymin ), (xmax , ymax ) = prediction ["geometry" ]
100- xmin , xmax = int (round (w * xmin )), int (round (w * xmax ))
101- ymin , ymax = int (round (h * ymin )), int (round (h * ymax ))
102-
103- # White drawing context adapted to font size, 0.75 factor to convert pts --> pix
104- font = get_font (font_family , int (0.75 * (ymax - ymin )))
105- img = Image .new ("RGB" , (xmax - xmin , ymax - ymin ), color = (255 , 255 , 255 ))
106- d = ImageDraw .Draw (img )
107- # Draw in black the value of the word
108- try :
109- d .text ((0 , 0 ), prediction ["value" ], font = font , fill = (0 , 0 , 0 ))
110- except UnicodeEncodeError :
111- # When character cannot be encoded, use its anyascii version
112- d .text ((0 , 0 ), anyascii (prediction ["value" ]), font = font , fill = (0 , 0 , 0 ))
113-
114- # Colorize if draw_proba
115- if draw_proba :
116- p = int (255 * prediction ["confidence" ])
117- mask = np .where (np .array (img ) == 0 , 1 , 0 )
118- proba : np .ndarray = np .array ([255 - p , 0 , p ])
119- color = mask * proba [np .newaxis , np .newaxis , :]
120- white_mask = 255 * (1 - mask )
121- img = color + white_mask
122-
123- # Write to response page
124- response [ymin :ymax , xmin :xmax , :] = np .array (img )
125-
126- return response
203+ _warn_rotation (prediction ) # pragma: no cover
204+ response = _synthesize (
205+ response = response ,
206+ entry = prediction ,
207+ w = w ,
208+ h = h ,
209+ draw_proba = draw_proba ,
210+ font_family = font_family ,
211+ )
212+ return np .array (response , dtype = np .uint8 )
0 commit comments