16
16
from difflib import SequenceMatcher
17
17
import hashlib
18
18
19
- __version__ = '1.1.7 '
19
+ __version__ = '1.1.8 '
20
20
21
21
DISPLAY_TITLE = r"""
22
22
_ _ _ _ ______
39
39
help = 'output file type(only the extension)' )
40
40
parser .add_argument ('-j' , '--filterTextFromJSON' , default = 'anonymizedTags.json' , type = str ,
41
41
help = 'A dictionary of dicom tags and their values' )
42
- parser .add_argument ('-r' , '--replaceTextFromJSON' , default = 'replaceTags.json' , type = str ,
43
- help = 'A dictionary of dicom tags and their replacement values' )
44
42
parser .add_argument ('-t' , '--threshold' , default = 0.8 , type = float ,
45
43
help = 'threshold of similarity ration between two words' )
46
- parser .add_argument ( '--pftelDB' ,
47
- dest = 'pftelDB' ,
48
- default = '' ,
49
- type = str ,
50
- help = 'optional pftel server DB path' )
44
+ parser .add_argument ('--pftelDB' ,
45
+ dest = 'pftelDB' ,
46
+ default = '' ,
47
+ type = str ,
48
+ help = 'optional pftel server DB path' )
51
49
52
50
53
51
# The main function of this *ChRIS* plugin is denoted by this ``@chris_plugin`` "decorator."
63
61
min_gpu_limit = 0 # set min_gpu_limit=1 to enable GPU
64
62
)
65
63
@pflog .tel_logTime (
66
- event = 'image_textRemove' ,
67
- log = 'Remove text from image'
64
+ event = 'image_textRemove' ,
65
+ log = 'Remove text from image'
68
66
)
69
67
def main (options : Namespace , inputdir : Path , outputdir : Path ):
70
68
"""
@@ -86,34 +84,24 @@ def main(options: Namespace, inputdir: Path, outputdir: Path):
86
84
#
87
85
# Refer to the documentation for more options, examples, and advanced uses e.g.
88
86
# adding a progress bar and parallelism.
89
- json_data_path = ''
87
+ json_data_path = ''
90
88
l_json_path = list (inputdir .glob ('**/*.json' ))
91
89
for json_path in l_json_path :
92
90
if json_path .name == options .filterTextFromJSON :
93
91
json_data_path = json_path
94
92
try :
95
93
f = open (json_data_path , 'r' )
96
94
data = json .load (f )
97
- except Exception as ex :
98
- print ("Error: " ,ex )
99
- # To be removed later
100
- replace_json_data_path = ''
101
- l_replace_json_path = list (inputdir .glob ('**/*.json' ))
102
- for json_path in l_replace_json_path :
103
- if json_path .name == options .replaceTextFromJSON :
104
- replace_json_data_path = json_path
105
- try :
106
- f = open (replace_json_data_path , 'r' )
107
- replace_data = json .load (f )
108
95
except Exception as ex :
109
96
print ("Error: " , ex )
97
+
110
98
box_list = []
111
99
mapper = PathMapper .file_mapper (inputdir , outputdir , glob = f"**/*.{ options .fileFilter } " , fail_if_empty = False )
112
100
for input_file , output_file in mapper :
113
101
# The code block below is a small and easy example of how to use a ``PathMapper``.
114
102
# It is recommended that you put your functionality in a helper function, so that
115
103
# it is more legible and can be unit tested.
116
- box_list , final_image = inpaint_text (str (input_file ), data , replace_data , box_list , options .threshold )
104
+ box_list , final_image = inpaint_text (str (input_file ), data , box_list , options .threshold )
117
105
img_rgb = cv2 .cvtColor (final_image , cv2 .COLOR_BGR2RGB )
118
106
output_file = str (output_file ).replace (options .fileFilter , options .outputType )
119
107
print (f"Saving output file as ----->{ output_file } <-----\n \n " )
@@ -126,41 +114,30 @@ def midpoint(x1, y1, x2, y2):
126
114
return x_mid , y_mid
127
115
128
116
129
- def inpaint_text (img_path , data , replace_data , box_list , similarity_threshold ):
117
+ def inpaint_text (img_path , data , box_list , similarity_threshold ):
130
118
word_list = []
131
- d_replace_text = {}
132
119
for item in data .keys ():
133
120
if item == 'PatientName' :
134
- anon_name = replace_data .get (item ).split ('^' )
135
121
real_name = data .get (item ).split ('^' )
136
122
for i in range (len (real_name )):
137
123
word_list .append (real_name [i ])
138
- d_replace_text [real_name [i ]] = anon_name [i ]
139
124
elif item == 'PatientBirthDate' :
140
125
yyyy = data .get (item )[0 :4 ]
141
126
mm = data .get (item )[4 :6 ]
142
127
dd = data .get (item )[6 :8 ]
143
- yyyy1 = replace_data .get (item )[0 :4 ]
144
- mm1 = replace_data .get (item )[4 :6 ]
145
- dd1 = replace_data .get (item )[6 :8 ]
146
128
word_list .append (f'{ mm } 1{ dd } 1{ yyyy } ' )
147
- d_replace_text [f'{ mm } 1{ dd } 1{ yyyy } ' ] = f'{ mm1 } /{ dd1 } /{ yyyy1 } '
148
129
else :
149
130
word_list .append (data .get (item ))
150
- d_replace_text [data .get (item )] = replace_data .get (item )
151
- print (d_replace_text )
152
131
# read image
153
132
print (f"Reading input file from ---->{ img_path } <----" )
154
- img = cv2 .imread (img_path )
133
+ img = cv2 .imread (img_path , cv2 .COLOR_BGR2RGB )
134
+ img = cv2 .cvtColor (img , cv2 .COLOR_BGR2RGB )
155
135
if not len (box_list ):
156
136
pipeline = keras_ocr .pipeline .Pipeline ()
157
137
# # generate (word, box) tuples
158
138
box_list = pipeline .recognize ([img ])[0 ]
159
139
160
-
161
140
mask = np .zeros (img .shape [:2 ], dtype = "uint8" )
162
- l_coordinates = []
163
- l_text = []
164
141
for box in box_list :
165
142
if (box [0 ].upper () in word_list ) or close_to_similar (box [0 ].upper (), word_list , similarity_threshold ):
166
143
print (f"Removing { box [0 ].upper ()} from image" )
@@ -175,32 +152,11 @@ def inpaint_text(img_path, data, replace_data, box_list, similarity_threshold):
175
152
thickness = int (math .sqrt ((x2 - x1 ) ** 2 + (y2 - y1 ) ** 2 ))
176
153
177
154
cv2 .line (mask , (x_mid0 , y_mid0 ), (x_mid1 , y_mi1 ), 255 ,
178
- thickness )
155
+ thickness )
179
156
img = cv2 .inpaint (img , mask , 7 , cv2 .INPAINT_NS )
180
- org = (round (x3 ),round (y3 ))
181
- l_coordinates .append (org )
182
- if box [0 ].upper () in d_replace_text :
183
- l_text .append (d_replace_text [box [0 ].upper ()])
184
- else :
185
- word = close_to_similar (box [0 ].upper (), word_list , similarity_threshold )
186
- l_text .append (d_replace_text [word ])
187
-
188
- color = (235 , 235 , 235 )
189
- print (l_text )
190
- for org ,text in zip (l_coordinates ,l_text ):
191
- #text = generate_hash_text(text)
192
- img = cv2 .putText (img , text , org , cv2 .FONT_HERSHEY_SIMPLEX , 0.7 , color , 2 )
193
157
194
158
return box_list , img
195
159
196
- def generate_hash_text (input_text : str ) -> str :
197
- """
198
- Given an input text, generate a md5 hash and
199
- return a hashed text of the original text's length
200
- """
201
- result = hashlib .md5 (input_text .encode ())
202
-
203
- return result .hexdigest ()[0 :len (input_text )].upper ()
204
160
205
161
def read_input_dicom (input_file_path ):
206
162
"""
@@ -239,6 +195,7 @@ def similar(a: str, b: str):
239
195
"""
240
196
return SequenceMatcher (None , a , b ).ratio ()
241
197
198
+
242
199
def close_to_similar (target : str , wordlist : str , similarity_threshold : float ):
243
200
for word in wordlist :
244
201
if similar (target , word ) >= similarity_threshold :
0 commit comments