Skip to content

Commit d357490

Browse files
authored
Merge pull request #152 from enoch3712/151-tesseract-only-extract-from-png-images-and-not-jpeg
151 tesseract only extract from png images and not jpeg
2 parents f6b098d + 64c9db1 commit d357490

File tree

2 files changed

+2
-2
lines changed

2 files changed

+2
-2
lines changed

extract_thinker/document_loader/document_loader_tesseract.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
class DocumentLoaderTesseract(CachedDocumentLoader):
1616
"""Document loader for OCR using Tesseract."""
1717

18-
SUPPORTED_FORMATS = ["jpeg", "png", "bmp", "tiff", "pdf"]
18+
SUPPORTED_FORMATS = ["jpeg", "png", "bmp", "tiff", "pdf", "jpg"]
1919

2020
def __init__(self, tesseract_cmd, isContainer=False, content=None, cache_ttl=300):
2121
super().__init__(content, cache_ttl)

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "extract_thinker"
3-
version = "0.0.30"
3+
version = "0.0.31"
44
description = "Library to extract data from files and documents agnositicaly using LLMs"
55
authors = ["Júlio Almeida <[email protected]>"]
66
readme = "README.md"

0 commit comments

Comments
 (0)