1+ from typing import Any , Dict , List , Union
2+ from io import BytesIO
3+ from PIL import Image
4+ import numpy as np
5+ from dataclasses import dataclass , field
6+ from cachetools import cachedmethod , TTLCache
7+ from cachetools .keys import hashkey
8+ from operator import attrgetter
9+ import easyocr
10+
11+ from extract_thinker .document_loader .cached_document_loader import CachedDocumentLoader
12+
13+
14+ @dataclass
15+ class EasyOCRConfig :
16+ """Configuration for EasyOCR loader.
17+
18+ Args:
19+ lang_list: List of languages to use for OCR. Defaults to ['en'].
20+ gpu: Whether to use GPU acceleration. Defaults to True.
21+ download_enabled: Whether to download models automatically. Defaults to True.
22+ cache_ttl: Time-to-live for cache in seconds. Defaults to 300.
23+ """
24+ lang_list : List [str ] = field (default_factory = lambda : ['en' ])
25+ gpu : bool = True
26+ download_enabled : bool = True
27+ cache_ttl : int = 300
28+
29+ def __post_init__ (self ):
30+ """Initialize EasyOCR reader with configuration settings and validation."""
31+ if not self .lang_list :
32+ raise ValueError ("lang_list must contain at least one language code." )
33+ if self .cache_ttl <= 0 :
34+ raise ValueError ("cache_ttl must be positive." )
35+
36+ self .reader = easyocr .Reader (
37+ lang_list = self .lang_list ,
38+ gpu = self .gpu ,
39+ download_enabled = self .download_enabled ,
40+ )
41+
42+
43+ class DocumentLoaderEasyOCR (CachedDocumentLoader ):
44+ SUPPORTED_FORMATS = ["png" , "jpg" , "jpeg" , "tiff" , "tif" , "webp" ]
45+
46+ def __init__ (self , config : EasyOCRConfig ):
47+ """Initialize the EasyOCR document loader.
48+
49+ Args:
50+ config: Configuration object for EasyOCR settings
51+ """
52+ super ().__init__ ()
53+ self .config = config
54+ self .cache = TTLCache (maxsize = 128 , ttl = self .config .cache_ttl )
55+ self .vision_mode = False
56+
57+ def can_handle (self , source : Union [str , BytesIO ]) -> bool :
58+ """Check if the loader can handle the given source.
59+
60+ Args:
61+ source: Path to a file or BytesIO stream
62+
63+ Returns:
64+ bool: True if source is supported, False otherwise
65+ """
66+ # Check if source is a BytesIO stream
67+ if isinstance (source , BytesIO ):
68+ return True
69+ # Check if source is a file path and has a valid extension
70+ if isinstance (source , str ) and '.' in source :
71+ # Extract the file extension (after the last '.') and convert to lowercase
72+ ext = source .split ('.' )[- 1 ].lower ()
73+ return ext in self .SUPPORTED_FORMATS
74+ return False
75+
76+ @cachedmethod (cache = attrgetter ('cache' ), key = lambda self , source : hashkey (source ) if isinstance (source , str ) else None )
77+ def load (self , source : Union [str , BytesIO ]) -> List [List [Dict [str , Any ]]]:
78+ """Load and process an image (file path or BytesIO) using EasyOCR.
79+
80+ Args:
81+ source: Image file path or in-memory image stream (BytesIO)
82+
83+ Returns:
84+ List of pages, where each page contains a list of OCR results.
85+ Each OCR result is a dictionary with:
86+ - text: The extracted text
87+ - probability: Confidence score
88+ - bbox: Bounding box coordinates
89+ """
90+ # Convert image from file path into numpy array
91+ if isinstance (source , str ):
92+ with Image .open (source ).convert ("RGB" ) as img :
93+ image_array = np .array (img )
94+ # Convert image from bytes stream into numpy array
95+ elif isinstance (source , BytesIO ):
96+ source .seek (0 )
97+ with Image .open (source ).convert ("RGB" ) as img :
98+ image_array = np .array (img )
99+ else :
100+ raise ValueError ("Unsupported source type. Expected str or BytesIO." )
101+
102+ ocr_result = self .config .reader .readtext (image_array )
103+ # Loop through OCR results and structure them into a dictionary format
104+ page_data = []
105+ for bbox , text , prob in ocr_result :
106+ page_data .append ({
107+ "bbox" : bbox ,
108+ "text" : text ,
109+ "probability" : prob
110+ })
111+ return [page_data ]
112+
113+ def can_handle_vision (self , source : Union [str , BytesIO ]) -> bool :
114+ """EasyOCR currently doesn't support vision mode in this loader."""
115+ return False
116+
117+ def set_vision_mode (self , enabled : bool = True ):
118+ """Disable vision mode, not supported here."""
119+ if enabled :
120+ raise ValueError ("Vision mode is not supported in EasyOCR loader." )
121+
0 commit comments