Skip to content

Commit 102ddcd

Browse files
committed
locally test bulk processing
1 parent 53b9468 commit 102ddcd

File tree

1 file changed

+33
-22
lines changed

1 file changed

+33
-22
lines changed

image_textRemove.py

+33-22
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,9 @@
1515
import sys
1616
from difflib import SequenceMatcher
1717
import hashlib
18+
import itertools
1819

19-
__version__ = '1.2.3'
20+
__version__ = '1.2.4'
2021

2122
DISPLAY_TITLE = r"""
2223
_ _ _ _ ______
@@ -84,31 +85,41 @@ def main(options: Namespace, inputdir: Path, outputdir: Path):
8485
#
8586
# Refer to the documentation for more options, examples, and advanced uses e.g.
8687
# adding a progress bar and parallelism.
88+
pipeline = keras_ocr.pipeline.Pipeline()
8789
json_data_path = ''
8890
data = {}
91+
l_tag_dir_path = []
92+
l_img_dir_path = set()
8993
l_json_path = list(inputdir.glob('**/*.json'))
9094
for json_path in l_json_path:
9195
if json_path.name == options.filterTextFromJSON:
9296
json_data_path = json_path
93-
print(json_path)
94-
try:
95-
f = open(json_data_path, 'r')
96-
data = json.load(f)
97-
except Exception as ex:
98-
print("Error: ", ex)
99-
100-
box_list = []
101-
mapper = PathMapper.file_mapper(inputdir, outputdir, glob=f"**/*.{options.fileFilter}", fail_if_empty=False)
102-
for input_file, output_file in mapper:
103-
print(data)
104-
# The code block below is a small and easy example of how to use a ``PathMapper``.
105-
# It is recommended that you put your functionality in a helper function, so that
106-
# it is more legible and can be unit tested.
107-
box_list, final_image = inpaint_text(str(input_file), data, box_list, options.threshold)
108-
img_rgb = cv2.cvtColor(final_image, cv2.COLOR_BGR2RGB)
109-
output_file = str(output_file).replace(options.fileFilter, options.outputType)
110-
print(f"Saving output file as ----->{output_file}<-----\n\n")
111-
cv2.imwrite(output_file, img_rgb)
97+
path = Path(json_data_path)
98+
l_tag_dir_path.append(path.parent.absolute())
99+
l_img_path = list(inputdir.glob(f"**/*.{options.fileFilter}"))
100+
for img_path in l_img_path:
101+
path = Path(img_path)
102+
l_img_dir_path.add(path.parent.absolute())
103+
104+
result = [(x,y) for x,y in itertools.product(l_tag_dir_path, l_img_dir_path) if str(x).split('/')[-1] == str(y).split('/')[-1]]
105+
106+
for tag_dir,image_dir in result:
107+
json_data_path = os.path.join(tag_dir,options.filterTextFromJSON)
108+
try:
109+
f = open(json_data_path, 'r')
110+
data = json.load(f)
111+
except Exception as ex:
112+
print("Error: ", ex)
113+
114+
box_list = []
115+
mapper = PathMapper.file_mapper(image_dir, outputdir, glob=f"**/*.{options.fileFilter}", fail_if_empty=False)
116+
for input_file, output_file in mapper:
117+
118+
box_list, final_image = inpaint_text(str(input_file), data, box_list, options.threshold, pipeline)
119+
img_rgb = cv2.cvtColor(final_image, cv2.COLOR_BGR2RGB)
120+
output_file = str(output_file).replace(options.fileFilter, options.outputType)
121+
print(f"Saving output file as ----->{output_file}<-----\n\n")
122+
cv2.imwrite(output_file, img_rgb)
112123

113124

114125
def midpoint(x1, y1, x2, y2):
@@ -117,7 +128,7 @@ def midpoint(x1, y1, x2, y2):
117128
return x_mid, y_mid
118129

119130

120-
def inpaint_text(img_path, data, box_list, similarity_threshold):
131+
def inpaint_text(img_path, data, box_list, similarity_threshold, pipeline):
121132
word_list = []
122133
for item in data.keys():
123134
if item == 'PatientName':
@@ -136,7 +147,7 @@ def inpaint_text(img_path, data, box_list, similarity_threshold):
136147
img = cv2.imread(img_path, cv2.COLOR_BGR2RGB)
137148
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
138149
if not len(box_list):
139-
pipeline = keras_ocr.pipeline.Pipeline()
150+
140151
# # generate (word, box) tuples
141152
box_list = pipeline.recognize([img])[0]
142153

0 commit comments

Comments
 (0)