Skip to content

Commit 8a14ab3

Browse files
authored
Merge pull request #123 from snexus/fix-image-parsing
Fix image processing edge cases and more robust handling of Geminit API responses
2 parents daac0ac + 2489df2 commit 8a14ab3

File tree

1 file changed

+11
-3
lines changed

1 file changed

+11
-3
lines changed

src/llmsearch/parsers/images/generic.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,11 @@ def extract_images(self) -> List[PDFImage]:
9191
for img in page.get_images():
9292
xref = img[0]
9393
data = doc.extract_image(xref)
94-
out_fn = self._resize_and_save_image(data, page.number, xref)
94+
try:
95+
out_fn = self._resize_and_save_image(data, page.number, xref)
96+
except Exception as ex:
97+
logger.error(f"An exception occured when opening the image: {str(ex)}")
98+
out_fn = None
9599
if out_fn:
96100
out_images.append(
97101
PDFImage(
@@ -131,7 +135,7 @@ def _resize_and_save_image(
131135
/ f"{self.pdf_fn.stem}_page_{page_num}_xref_{xref_num}.png"
132136
)
133137
logger.debug(f"Saving file: {out_fn}")
134-
img.save(out_fn)
138+
img.convert("RGB").save(out_fn)
135139

136140
return out_fn
137141

@@ -160,13 +164,17 @@ def log_attempt_number(retry_state):
160164
logger.error(
161165
f"API call attempt {retry_state.attempt_number} failed with error: {error_message}. Retrying..."
162166
)
163-
# logger.error(f"API call attempt failed. Retrying: {retry_state.attempt_number}...")
164167

165168

169+
def on_retry_failed(retry_state):
170+
logger.error("API calls failed for maximum number of retries. Skipping image processing for this graph.")
171+
return retry_state.args[0]
172+
166173
@retry(
167174
wait=wait_random_exponential(min=5, max=60),
168175
stop=stop_after_attempt(6),
169176
after=log_attempt_number,
177+
retry_error_callback= on_retry_failed,
170178
)
171179
def analyze_single_image(
172180
pdf_image: PDFImage, image_analyzer: Callable, i: int

0 commit comments

Comments
 (0)