15
15
import sys
16
16
from difflib import SequenceMatcher
17
17
import hashlib
18
+ import itertools
18
19
19
- __version__ = '1.2.3 '
20
+ __version__ = '1.2.4 '
20
21
21
22
DISPLAY_TITLE = r"""
22
23
_ _ _ _ ______
@@ -84,31 +85,41 @@ def main(options: Namespace, inputdir: Path, outputdir: Path):
84
85
#
85
86
# Refer to the documentation for more options, examples, and advanced uses e.g.
86
87
# adding a progress bar and parallelism.
88
+ pipeline = keras_ocr .pipeline .Pipeline ()
87
89
json_data_path = ''
88
90
data = {}
91
+ l_tag_dir_path = []
92
+ l_img_dir_path = set ()
89
93
l_json_path = list (inputdir .glob ('**/*.json' ))
90
94
for json_path in l_json_path :
91
95
if json_path .name == options .filterTextFromJSON :
92
96
json_data_path = json_path
93
- print (json_path )
94
- try :
95
- f = open (json_data_path , 'r' )
96
- data = json .load (f )
97
- except Exception as ex :
98
- print ("Error: " , ex )
99
-
100
- box_list = []
101
- mapper = PathMapper .file_mapper (inputdir , outputdir , glob = f"**/*.{ options .fileFilter } " , fail_if_empty = False )
102
- for input_file , output_file in mapper :
103
- print (data )
104
- # The code block below is a small and easy example of how to use a ``PathMapper``.
105
- # It is recommended that you put your functionality in a helper function, so that
106
- # it is more legible and can be unit tested.
107
- box_list , final_image = inpaint_text (str (input_file ), data , box_list , options .threshold )
108
- img_rgb = cv2 .cvtColor (final_image , cv2 .COLOR_BGR2RGB )
109
- output_file = str (output_file ).replace (options .fileFilter , options .outputType )
110
- print (f"Saving output file as ----->{ output_file } <-----\n \n " )
111
- cv2 .imwrite (output_file , img_rgb )
97
+ path = Path (json_data_path )
98
+ l_tag_dir_path .append (path .parent .absolute ())
99
+ l_img_path = list (inputdir .glob (f"**/*.{ options .fileFilter } " ))
100
+ for img_path in l_img_path :
101
+ path = Path (img_path )
102
+ l_img_dir_path .add (path .parent .absolute ())
103
+
104
+ result = [(x ,y ) for x ,y in itertools .product (l_tag_dir_path , l_img_dir_path ) if str (x ).split ('/' )[- 1 ] == str (y ).split ('/' )[- 1 ]]
105
+
106
+ for tag_dir ,image_dir in result :
107
+ json_data_path = os .path .join (tag_dir ,options .filterTextFromJSON )
108
+ try :
109
+ f = open (json_data_path , 'r' )
110
+ data = json .load (f )
111
+ except Exception as ex :
112
+ print ("Error: " , ex )
113
+
114
+ box_list = []
115
+ mapper = PathMapper .file_mapper (image_dir , outputdir , glob = f"**/*.{ options .fileFilter } " , fail_if_empty = False )
116
+ for input_file , output_file in mapper :
117
+
118
+ box_list , final_image = inpaint_text (str (input_file ), data , box_list , options .threshold , pipeline )
119
+ img_rgb = cv2 .cvtColor (final_image , cv2 .COLOR_BGR2RGB )
120
+ output_file = str (output_file ).replace (options .fileFilter , options .outputType )
121
+ print (f"Saving output file as ----->{ output_file } <-----\n \n " )
122
+ cv2 .imwrite (output_file , img_rgb )
112
123
113
124
114
125
def midpoint (x1 , y1 , x2 , y2 ):
@@ -117,7 +128,7 @@ def midpoint(x1, y1, x2, y2):
117
128
return x_mid , y_mid
118
129
119
130
120
- def inpaint_text (img_path , data , box_list , similarity_threshold ):
131
+ def inpaint_text (img_path , data , box_list , similarity_threshold , pipeline ):
121
132
word_list = []
122
133
for item in data .keys ():
123
134
if item == 'PatientName' :
@@ -136,7 +147,7 @@ def inpaint_text(img_path, data, box_list, similarity_threshold):
136
147
img = cv2 .imread (img_path , cv2 .COLOR_BGR2RGB )
137
148
img = cv2 .cvtColor (img , cv2 .COLOR_BGR2RGB )
138
149
if not len (box_list ):
139
- pipeline = keras_ocr . pipeline . Pipeline ()
150
+
140
151
# # generate (word, box) tuples
141
152
box_list = pipeline .recognize ([img ])[0 ]
142
153
0 commit comments