Skip to content

Commit 0e44926

Browse files
committed
Do not return None
1 parent c1e9b8e commit 0e44926

File tree

1 file changed

+2
-4
lines changed

1 file changed

+2
-4
lines changed

unstructured/partition/utils/ocr_models/tesseract_ocr.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -136,14 +136,12 @@ def hocr_to_dataframe(
136136
return ocr_df
137137

138138
@staticmethod
139-
def extract_word_from_hocr(
140-
word: Tag, character_confidence_threshold: float = 0.0
141-
) -> str | None:
139+
def extract_word_from_hocr(word: Tag, character_confidence_threshold: float = 0.0) -> str:
142140
"""Extracts a word from an hOCR word tag, filtering out characters with low confidence."""
143141

144142
character_spans = word.find_all("span", class_="ocrx_cinfo")
145143
if len(character_spans) == 0:
146-
return None
144+
return ""
147145

148146
word_text = ""
149147
for character_span in character_spans:

0 commit comments

Comments
 (0)