Skip to content

Commit 4194a07

Browse files
authored
build(deps): replace pillow-heif with pi-heif (#3571)
### Summary Closes #2664 and replaces `pillow-heif` with `pi-heif` due to more permissive licensing on the binary wheel for `pi-heif`.
1 parent ddba928 commit 4194a07

26 files changed

+35
-93
lines changed

CHANGELOG.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
## 0.15.8-dev5
1+
## 0.15.8
22

33
### Enhancements
44

@@ -10,6 +10,7 @@
1010

1111
### Fixes
1212

13+
* **Replace `pillow-heif` with `pi-heif`**. Replaces `pillow-heif` with `pi-heif` due to more permissive licensing on the wheel for `pi-heif`.
1314
* **Minify text_as_html from DOCX.** Previously `.metadata.text_as_html` for DOCX tables was "bloated" with whitespace and noise elements introduced by `tabulate` that produced over-chunking and lower "semantic density" of elements. Reduce HTML to minimum character count without preserving all text.
1415
* **Fall back to filename extension-based file-type detection for unidentified OLE files.** Resolves a problem where a DOC file that could not be detected as such by `filetype` was incorrectly identified as a MSG file.
1516

requirements/base.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ h11==0.14.0
4141
# via httpcore
4242
httpcore==1.0.5
4343
# via httpx
44-
httpx==0.27.0
44+
httpx==0.27.2
4545
# via unstructured-client
4646
idna==3.8
4747
# via

requirements/dev.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,7 @@ wheel==0.44.0
354354
# pip-tools
355355
widgetsnbextension==4.0.13
356356
# via ipywidgets
357-
zipp==3.20.0
357+
zipp==3.20.1
358358
# via importlib-metadata
359359

360360
# The following packages are considered to be unsafe in a requirements file:

requirements/extra-markdown.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,5 @@ importlib-metadata==8.4.0
88
# via markdown
99
markdown==3.7
1010
# via -r ./extra-markdown.in
11-
zipp==3.20.0
11+
zipp==3.20.1
1212
# via importlib-metadata

requirements/extra-paddleocr.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ httpcore==1.0.5
4343
# via
4444
# -c ./base.txt
4545
# httpx
46-
httpx==0.27.0
46+
httpx==0.27.2
4747
# via
4848
# -c ./base.txt
4949
# paddlepaddle
@@ -176,5 +176,5 @@ urllib3==1.26.19
176176
# -c ././deps/constraints.txt
177177
# -c ./base.txt
178178
# requests
179-
zipp==3.20.0
179+
zipp==3.20.1
180180
# via importlib-resources

requirements/extra-pdf-image.in

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ onnx
55
pdf2image
66
pdfminer.six
77
pikepdf
8-
pillow_heif
8+
pi_heif
99
pypdf
1010
google-cloud-vision
1111
effdet

requirements/extra-pdf-image.txt

+5-5
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ google-auth==2.34.0
5353
# google-cloud-vision
5454
google-cloud-vision==3.7.4
5555
# via -r ./extra-pdf-image.in
56-
googleapis-common-protos==1.63.2
56+
googleapis-common-protos==1.64.0
5757
# via
5858
# google-api-core
5959
# grpcio-status
@@ -147,6 +147,8 @@ pdfminer-six==20231228
147147
# pdfplumber
148148
pdfplumber==0.11.4
149149
# via layoutparser
150+
pi-heif==0.18.0
151+
# via -r ./extra-pdf-image.in
150152
pikepdf==9.2.0
151153
# via -r ./extra-pdf-image.in
152154
pillow==10.4.0
@@ -155,12 +157,10 @@ pillow==10.4.0
155157
# matplotlib
156158
# pdf2image
157159
# pdfplumber
160+
# pi-heif
158161
# pikepdf
159-
# pillow-heif
160162
# torchvision
161163
# unstructured-pytesseract
162-
pillow-heif==0.18.0
163-
# via -r ./extra-pdf-image.in
164164
portalocker==2.10.1
165165
# via iopath
166166
proto-plus==1.24.0
@@ -293,5 +293,5 @@ wrapt==1.16.0
293293
# -c ././deps/constraints.txt
294294
# -c ./base.txt
295295
# deprecated
296-
zipp==3.20.0
296+
zipp==3.20.1
297297
# via importlib-resources

requirements/ingest/astradb.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ httpcore==1.0.5
5151
# via
5252
# -c ./ingest/../base.txt
5353
# httpx
54-
httpx[http2]==0.27.0
54+
httpx[http2]==0.27.2
5555
# via
5656
# -c ./ingest/../base.txt
5757
# astrapy

requirements/ingest/chroma.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ fsspec==2024.6.1
6060
# via huggingface-hub
6161
google-auth==2.34.0
6262
# via kubernetes
63-
googleapis-common-protos==1.63.2
63+
googleapis-common-protos==1.64.0
6464
# via opentelemetry-exporter-otlp-proto-grpc
6565
grpcio==1.66.0
6666
# via
@@ -245,7 +245,7 @@ wrapt==1.16.0
245245
# -c ./ingest/../deps/constraints.txt
246246
# deprecated
247247
# opentelemetry-instrumentation
248-
zipp==3.20.0
248+
zipp==3.20.1
249249
# via
250250
# importlib-metadata
251251
# importlib-resources

requirements/ingest/clarifai.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ clarifai-grpc==10.7.1
1919
# via clarifai
2020
contextlib2==21.6.0
2121
# via schema
22-
googleapis-common-protos==1.63.2
22+
googleapis-common-protos==1.64.0
2323
# via clarifai-grpc
2424
grpcio==1.66.0
2525
# via
@@ -61,7 +61,7 @@ requests==2.32.3
6161
# via
6262
# -c ./ingest/../base.txt
6363
# clarifai-grpc
64-
rich==13.7.1
64+
rich==13.8.0
6565
# via clarifai
6666
schema==0.7.5
6767
# via clarifai

requirements/ingest/databricks-volumes.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ charset-normalizer==3.3.2
1515
# via
1616
# -c ./ingest/../base.txt
1717
# requests
18-
databricks-sdk==0.30.0
18+
databricks-sdk==0.31.0
1919
# via -r ./ingest/databricks-volumes.in
2020
google-auth==2.34.0
2121
# via databricks-sdk

requirements/ingest/embed-aws-bedrock.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ httpcore==1.0.5
6262
# via
6363
# -c ./ingest/../base.txt
6464
# httpx
65-
httpx==0.27.0
65+
httpx==0.27.2
6666
# via
6767
# -c ./ingest/../base.txt
6868
# langsmith

requirements/ingest/embed-huggingface.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ httpcore==1.0.5
4242
# via
4343
# -c ./ingest/../base.txt
4444
# httpx
45-
httpx==0.27.0
45+
httpx==0.27.2
4646
# via
4747
# -c ./ingest/../base.txt
4848
# langsmith

requirements/ingest/embed-octoai.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ httpcore==1.0.5
3636
# via
3737
# -c ./ingest/../base.txt
3838
# httpx
39-
httpx==0.27.0
39+
httpx==0.27.2
4040
# via
4141
# -c ./ingest/../base.txt
4242
# openai

requirements/ingest/embed-openai.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ httpcore==1.0.5
3636
# via
3737
# -c ./ingest/../base.txt
3838
# httpx
39-
httpx==0.27.0
39+
httpx==0.27.2
4040
# via
4141
# -c ./ingest/../base.txt
4242
# langsmith

requirements/ingest/embed-vertexai.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ google-resumable-media==2.7.2
8888
# via
8989
# google-cloud-bigquery
9090
# google-cloud-storage
91-
googleapis-common-protos[grpc]==1.63.2
91+
googleapis-common-protos[grpc]==1.64.0
9292
# via
9393
# google-api-core
9494
# grpc-google-iam-v1
@@ -112,7 +112,7 @@ httpcore==1.0.5
112112
# via
113113
# -c ./ingest/../base.txt
114114
# httpx
115-
httpx==0.27.0
115+
httpx==0.27.2
116116
# via
117117
# -c ./ingest/../base.txt
118118
# langchain-google-vertexai

requirements/ingest/embed-voyageai.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ httpcore==1.0.5
5353
# via
5454
# -c ./ingest/../base.txt
5555
# httpx
56-
httpx==0.27.0
56+
httpx==0.27.2
5757
# via
5858
# -c ./ingest/../base.txt
5959
# langsmith

requirements/ingest/gcs.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ google-crc32c==1.5.0
6666
# google-resumable-media
6767
google-resumable-media==2.7.2
6868
# via google-cloud-storage
69-
googleapis-common-protos==1.63.2
69+
googleapis-common-protos==1.64.0
7070
# via google-api-core
7171
idna==3.8
7272
# via

requirements/ingest/google-drive.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ google-auth==2.34.0
2626
# google-auth-httplib2
2727
google-auth-httplib2==0.2.0
2828
# via google-api-python-client
29-
googleapis-common-protos==1.63.2
29+
googleapis-common-protos==1.64.0
3030
# via google-api-core
3131
httplib2==0.22.0
3232
# via

requirements/ingest/notion.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ httpcore==1.0.5
2828
# via
2929
# -c ./ingest/../base.txt
3030
# httpx
31-
httpx==0.27.0
31+
httpx==0.27.2
3232
# via
3333
# -c ./ingest/../base.txt
3434
# notion-client

requirements/ingest/qdrant.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ httpcore==1.0.5
3939
# via
4040
# -c ./ingest/../base.txt
4141
# httpx
42-
httpx[http2]==0.27.0
42+
httpx[http2]==0.27.2
4343
# via
4444
# -c ./ingest/../base.txt
4545
# qdrant-client

requirements/ingest/singlestore.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ wheel==0.44.0
5656
# via
5757
# -c ./ingest/../deps/constraints.txt
5858
# singlestoredb
59-
zipp==3.20.0
59+
zipp==3.20.1
6060
# via importlib-metadata
6161

6262
# The following packages are considered to be unsafe in a requirements file:

requirements/ingest/weaviate.txt

+1-60
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,12 @@
44
#
55
# pip-compile ./ingest/weaviate.in
66
#
7-
annotated-types==0.7.0
8-
# via pydantic
9-
anyio==4.4.0
10-
# via
11-
# -c ./ingest/../base.txt
12-
# httpx
137
authlib==1.3.2
148
# via weaviate-client
159
certifi==2024.7.4
1610
# via
1711
# -c ./ingest/../base.txt
1812
# -c ./ingest/../deps/constraints.txt
19-
# httpcore
20-
# httpx
2113
# requests
2214
cffi==1.17.0
2315
# via cryptography
@@ -27,75 +19,24 @@ charset-normalizer==3.3.2
2719
# requests
2820
cryptography==43.0.0
2921
# via authlib
30-
exceptiongroup==1.2.2
31-
# via
32-
# -c ./ingest/../base.txt
33-
# anyio
34-
grpcio==1.66.0
35-
# via
36-
# -c ./ingest/../deps/constraints.txt
37-
# grpcio-health-checking
38-
# grpcio-tools
39-
# weaviate-client
40-
grpcio-health-checking==1.62.3
41-
# via weaviate-client
42-
grpcio-tools==1.62.3
43-
# via weaviate-client
44-
h11==0.14.0
45-
# via
46-
# -c ./ingest/../base.txt
47-
# httpcore
48-
httpcore==1.0.5
49-
# via
50-
# -c ./ingest/../base.txt
51-
# httpx
52-
httpx==0.27.0
53-
# via
54-
# -c ./ingest/../base.txt
55-
# weaviate-client
5622
idna==3.8
5723
# via
5824
# -c ./ingest/../base.txt
59-
# anyio
60-
# httpx
6125
# requests
62-
protobuf==4.23.4
63-
# via
64-
# -c ./ingest/../deps/constraints.txt
65-
# grpcio-health-checking
66-
# grpcio-tools
6726
pycparser==2.22
6827
# via cffi
69-
pydantic==2.8.2
70-
# via weaviate-client
71-
pydantic-core==2.20.1
72-
# via pydantic
7328
requests==2.32.3
7429
# via
7530
# -c ./ingest/../base.txt
7631
# weaviate-client
77-
sniffio==1.3.1
78-
# via
79-
# -c ./ingest/../base.txt
80-
# anyio
81-
# httpx
82-
typing-extensions==4.12.2
83-
# via
84-
# -c ./ingest/../base.txt
85-
# anyio
86-
# pydantic
87-
# pydantic-core
8832
urllib3==1.26.19
8933
# via
9034
# -c ./ingest/../base.txt
9135
# -c ./ingest/../deps/constraints.txt
9236
# requests
9337
validators==0.33.0
9438
# via weaviate-client
95-
weaviate-client==4.7.1
39+
weaviate-client==3.26.7
9640
# via
9741
# -c ./ingest/../deps/constraints.txt
9842
# -r ./ingest/weaviate.in
99-
100-
# The following packages are considered to be unsafe in a requirements file:
101-
# setuptools

requirements/test.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ httpcore==1.0.5
6363
# via
6464
# -c ./base.txt
6565
# httpx
66-
httpx==0.27.0
66+
httpx==0.27.2
6767
# via
6868
# -c ./base.txt
6969
# label-studio-sdk

unstructured/__version__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.15.8-dev5" # pragma: no cover
1+
__version__ = "0.15.8" # pragma: no cover

unstructured/partition/pdf.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
from pdfminer.layout import LTChar, LTContainer, LTImage, LTItem, LTTextBox
1616
from pdfminer.pdftypes import PDFObjRef
1717
from pdfminer.utils import open_filename
18+
from pi_heif import register_heif_opener
1819
from PIL import Image as PILImage
19-
from pillow_heif import register_heif_opener
2020
from pypdf import PdfReader
2121

2222
from unstructured.chunking import add_chunking_strategy

0 commit comments

Comments
 (0)