Skip to content

Commit dd32ab1

Browse files
authored
fix: make paddleocr install optional (#75)
* Pin protobuf to avoid error * Add pin note * Bump version and release * Stop importing paddleocr unless needed * make paddleocr optional * update reqs
1 parent db173d0 commit dd32ab1

File tree

5 files changed

+30
-7
lines changed

5 files changed

+30
-7
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
## 0.3.1
2+
3+
* Pin protobuf version to avoid errors
4+
* Make paddleocr an extra again
5+
16
## 0.3.0
27

38
* Fix for text block detection

requirements/dev.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ ipykernel==6.22.0
6767
# nbclassic
6868
# notebook
6969
# qtconsole
70-
ipython==8.11.0
70+
ipython==8.12.0
7171
# via
7272
# -r requirements/dev.in
7373
# ipykernel
@@ -143,7 +143,7 @@ matplotlib-inline==0.1.6
143143
# ipython
144144
mistune==2.0.5
145145
# via nbconvert
146-
nbclassic==0.5.3
146+
nbclassic==0.5.4
147147
# via notebook
148148
nbclient==0.7.2
149149
# via nbconvert
@@ -304,6 +304,8 @@ traitlets==5.9.0
304304
# nbformat
305305
# notebook
306306
# qtconsole
307+
typing-extensions==4.5.0
308+
# via ipython
307309
uri-template==1.2.0
308310
# via jsonschema
309311
wcwidth==0.2.6

setup.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,22 @@
5959
"opencv-python==4.6.0.66",
6060
"onnxruntime",
6161
"transformers",
62-
'unstructured.PaddleOCR ; platform_machine=="x86_64"',
6362
],
63+
extras_require={
64+
"tables": [
65+
'unstructured.PaddleOCR ; platform_machine=="x86_64"',
66+
# NOTE(crag): workaround issue for error output below
67+
# ERROR test_unstructured/partition/test_common.py - TypeError: Descriptors cannot not
68+
# be created directly.
69+
# If this call came from a _pb2.py file, your generated code is out of date and must be
70+
# regenerated with protoc >= 3.19.0.
71+
# If you cannot immediately regenerate your protos, some other possible workarounds are:
72+
# 1. Downgrade the protobuf package to 3.20.x or lower.
73+
# 2. Set PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python (but this will use pure-Python
74+
# parsing and will be much slower).
75+
"protobuf<3.21",
76+
# NOTE(alan): Pin to get around error: undefined symbol: _dl_sym, version GLIBC_PRIVATE
77+
"paddlepaddle>=2.4",
78+
]
79+
},
6480
)
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.3.0" # pragma: no cover
1+
__version__ = "0.3.1" # pragma: no cover

unstructured_inference/models/paddle_ocr.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
from unstructured_paddleocr import PaddleOCR
2-
3-
paddle_ocr: PaddleOCR = None
1+
paddle_ocr = None # type: ignore
42

53

64
def load_agent():
75
"""Loads the PaddleOCR agent as a global variable to ensure that we only load it once."""
86

7+
from unstructured_paddleocr import PaddleOCR
8+
99
global paddle_ocr
1010
paddle_ocr = PaddleOCR(use_angle_cls=True, lang="en", mkl_dnn=True, show_log=False)
1111

0 commit comments

Comments
 (0)