Skip to content

Commit b354e8e

Browse files
authored
Chore: Allow passing kwargs to request data field (#716)
* bump again :( * update to kwarg * add test case * rename to request_kwargs * remove install detectron2 * pip compile * add changelog for remove detectron2 install * resolve weaviate import issue on python 3.9
1 parent fc53277 commit b354e8e

File tree

10 files changed

+86
-118
lines changed

10 files changed

+86
-118
lines changed

Diff for: CHANGELOG.md

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,18 @@
1-
## 0.7.4-dev0
1+
## 0.7.4
22

33
### Enhancements
44

5+
* Allows passing kwargs to request data field for `partition_via_api` and `partition_multiple_via_api`
56
* Enable MIME type detection if libmagic is not available
67
* Adds handling for empty files in `detect_filetype` and `partition`.
78

89
### Features
910

1011
### Fixes
1112

13+
* Reslove `grpcio` import issue on `weaviate.schema.validate_schema` for python 3.9 and 3.10
14+
* Remove building `detectron2` from source in Dockerfile
15+
1216
## 0.7.3
1317

1418
### Enhancements

Diff for: Dockerfile

-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ RUN python3.8 -m pip install pip==${PIP_VERSION} && \
2929
pip install --no-cache -r requirements/ingest-slack.txt && \
3030
pip install --no-cache -r requirements/ingest-wikipedia.txt && \
3131
pip install --no-cache -r requirements/local-inference.txt && \
32-
pip install --no-cache "detectron2@git+https://github.com/facebookresearch/detectron2.git@e2ce8dc#egg=detectron2" && \
3332
dnf -y groupremove "Development Tools" && \
3433
dnf clean all
3534

Diff for: requirements/dev.txt

+6-2
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@ anyio==3.7.0
88
# via
99
# -c requirements/base.txt
1010
# jupyter-server
11+
appnope==0.1.3
12+
# via
13+
# ipykernel
14+
# ipython
1115
argon2-cffi==21.3.0
1216
# via
1317
# jupyter-server
@@ -59,7 +63,7 @@ executing==1.2.0
5963
# via stack-data
6064
fastjsonschema==2.17.1
6165
# via nbformat
62-
filelock==3.12.0
66+
filelock==3.12.1
6367
# via virtualenv
6468
fqdn==1.5.1
6569
# via jsonschema
@@ -215,7 +219,7 @@ pip-tools==6.13.0
215219
# via -r requirements/dev.in
216220
pkgutil-resolve-name==1.3.10
217221
# via jsonschema
218-
platformdirs==3.5.1
222+
platformdirs==3.5.3
219223
# via
220224
# -c requirements/test.txt
221225
# jupyter-core

Diff for: requirements/huggingface.txt

+2-48
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,11 @@ click==8.1.3
1717
# via
1818
# -c requirements/base.txt
1919
# sacremoses
20-
cmake==3.26.4
21-
# via triton
22-
filelock==3.12.0
20+
filelock==3.12.1
2321
# via
2422
# huggingface-hub
2523
# torch
2624
# transformers
27-
# triton
2825
fsspec==2023.6.0
2926
# via huggingface-hub
3027
huggingface-hub==0.15.1
@@ -41,8 +38,6 @@ joblib==1.2.0
4138
# sacremoses
4239
langdetect==1.0.9
4340
# via -r requirements/huggingface.in
44-
lit==16.0.5.post0
45-
# via triton
4641
markupsafe==2.1.3
4742
# via jinja2
4843
mpmath==1.3.0
@@ -53,31 +48,6 @@ numpy==1.23.5
5348
# via
5449
# -c requirements/base.txt
5550
# transformers
56-
nvidia-cublas-cu11==11.10.3.66
57-
# via
58-
# nvidia-cudnn-cu11
59-
# nvidia-cusolver-cu11
60-
# torch
61-
nvidia-cuda-cupti-cu11==11.7.101
62-
# via torch
63-
nvidia-cuda-nvrtc-cu11==11.7.99
64-
# via torch
65-
nvidia-cuda-runtime-cu11==11.7.99
66-
# via torch
67-
nvidia-cudnn-cu11==8.5.0.96
68-
# via torch
69-
nvidia-cufft-cu11==10.9.0.58
70-
# via torch
71-
nvidia-curand-cu11==10.2.10.91
72-
# via torch
73-
nvidia-cusolver-cu11==11.4.0.1
74-
# via torch
75-
nvidia-cusparse-cu11==11.7.4.91
76-
# via torch
77-
nvidia-nccl-cu11==2.14.3
78-
# via torch
79-
nvidia-nvtx-cu11==11.7.91
80-
# via torch
8151
packaging==23.1
8252
# via
8353
# -c requirements/base.txt
@@ -113,9 +83,7 @@ sympy==1.12
11383
tokenizers==0.13.3
11484
# via transformers
11585
torch==2.0.1
116-
# via
117-
# -r requirements/huggingface.in
118-
# triton
86+
# via -r requirements/huggingface.in
11987
tqdm==4.65.0
12088
# via
12189
# -c requirements/base.txt
@@ -124,8 +92,6 @@ tqdm==4.65.0
12492
# transformers
12593
transformers==4.30.1
12694
# via -r requirements/huggingface.in
127-
triton==2.0.0
128-
# via torch
12995
typing-extensions==4.6.3
13096
# via
13197
# -c requirements/base.txt
@@ -136,15 +102,3 @@ urllib3==1.26.16
136102
# -c requirements/base.txt
137103
# -c requirements/constraints.in
138104
# requests
139-
wheel==0.40.0
140-
# via
141-
# -c requirements/constraints.in
142-
# nvidia-cublas-cu11
143-
# nvidia-cuda-cupti-cu11
144-
# nvidia-cuda-runtime-cu11
145-
# nvidia-curand-cu11
146-
# nvidia-cusparse-cu11
147-
# nvidia-nvtx-cu11
148-
149-
# The following packages are considered to be unsafe in a requirements file:
150-
# setuptools

Diff for: requirements/local-inference.txt

+2-47
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,6 @@ charset-normalizer==3.1.0
2020
# -c requirements/base.txt
2121
# pdfminer-six
2222
# requests
23-
cmake==3.26.4
24-
# via triton
2523
coloredlogs==15.0.1
2624
# via onnxruntime
2725
contourpy==1.0.7
@@ -34,15 +32,14 @@ cycler==0.11.0
3432
# via matplotlib
3533
effdet==0.4.1
3634
# via layoutparser
37-
filelock==3.12.0
35+
filelock==3.12.1
3836
# via
3937
# huggingface-hub
4038
# torch
4139
# transformers
42-
# triton
4340
flatbuffers==23.5.26
4441
# via onnxruntime
45-
fonttools==4.39.4
42+
fonttools==4.40.0
4643
# via matplotlib
4744
fsspec==2023.6.0
4845
# via huggingface-hub
@@ -67,8 +64,6 @@ kiwisolver==1.4.4
6764
# via matplotlib
6865
layoutparser[layoutmodels,tesseract]==0.3.4
6966
# via unstructured-inference
70-
lit==16.0.5.post0
71-
# via triton
7267
markupsafe==2.1.3
7368
# via jinja2
7469
matplotlib==3.7.1
@@ -90,31 +85,6 @@ numpy==1.23.5
9085
# scipy
9186
# torchvision
9287
# transformers
93-
nvidia-cublas-cu11==11.10.3.66
94-
# via
95-
# nvidia-cudnn-cu11
96-
# nvidia-cusolver-cu11
97-
# torch
98-
nvidia-cuda-cupti-cu11==11.7.101
99-
# via torch
100-
nvidia-cuda-nvrtc-cu11==11.7.99
101-
# via torch
102-
nvidia-cuda-runtime-cu11==11.7.99
103-
# via torch
104-
nvidia-cudnn-cu11==8.5.0.96
105-
# via torch
106-
nvidia-cufft-cu11==10.9.0.58
107-
# via torch
108-
nvidia-curand-cu11==10.2.10.91
109-
# via torch
110-
nvidia-cusolver-cu11==11.4.0.1
111-
# via torch
112-
nvidia-cusparse-cu11==11.7.4.91
113-
# via torch
114-
nvidia-nccl-cu11==2.14.3
115-
# via torch
116-
nvidia-nvtx-cu11==11.7.91
117-
# via torch
11888
omegaconf==2.3.0
11989
# via effdet
12090
onnxruntime==1.15.0
@@ -220,7 +190,6 @@ torch==2.0.1
220190
# layoutparser
221191
# timm
222192
# torchvision
223-
# triton
224193
torchvision==0.15.2
225194
# via
226195
# effdet
@@ -234,8 +203,6 @@ tqdm==4.65.0
234203
# transformers
235204
transformers==4.30.1
236205
# via unstructured-inference
237-
triton==2.0.0
238-
# via torch
239206
typing-extensions==4.6.3
240207
# via
241208
# -c requirements/base.txt
@@ -251,19 +218,7 @@ urllib3==1.26.16
251218
# requests
252219
wand==0.6.11
253220
# via pdfplumber
254-
wheel==0.40.0
255-
# via
256-
# -c requirements/constraints.in
257-
# nvidia-cublas-cu11
258-
# nvidia-cuda-cupti-cu11
259-
# nvidia-cuda-runtime-cu11
260-
# nvidia-curand-cu11
261-
# nvidia-cusparse-cu11
262-
# nvidia-nvtx-cu11
263221
zipp==3.15.0
264222
# via
265223
# -c requirements/base.txt
266224
# importlib-resources
267-
268-
# The following packages are considered to be unsafe in a requirements file:
269-
# setuptools

Diff for: requirements/test.in

+1
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,4 @@ types-Markdown
1717
types-requests
1818
types-tabulate
1919
vcrpy
20+
grpcio

Diff for: requirements/test.txt

+4-2
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ flake8==6.0.0
3434
# via -r requirements/test.in
3535
freezegun==1.2.2
3636
# via -r requirements/test.in
37+
grpcio==1.54.2
38+
# via -r requirements/test.in
3739
idna==3.4
3840
# via
3941
# -c requirements/base.txt
@@ -67,7 +69,7 @@ packaging==23.1
6769
# pytest
6870
pathspec==0.11.1
6971
# via black
70-
platformdirs==3.5.1
72+
platformdirs==3.5.3
7173
# via black
7274
pluggy==1.0.0
7375
# via pytest
@@ -79,7 +81,7 @@ pydantic==1.10.9
7981
# label-studio-sdk
8082
pyflakes==3.0.1
8183
# via flake8
82-
pytest==7.3.1
84+
pytest==7.3.2
8385
# via
8486
# pytest-cov
8587
# pytest-mock

Diff for: test_unstructured/partition/test_api.py

+37
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,19 @@ def test_partition_via_api_raises_with_bad_response(monkeypatch):
9292
partition_via_api(filename=filename, api_key="FAKEROO")
9393

9494

95+
def test_partition_via_api_valid_request_data_kwargs():
96+
filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.pdf")
97+
98+
elements = partition_via_api(filename=filename, api_key="FAKEROO", strategy="fast")
99+
assert isinstance(elements, list)
100+
101+
102+
def test_partition_via_api_invalid_request_data_kwargs():
103+
filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.pdf")
104+
with pytest.raises(ValueError):
105+
partition_via_api(filename=filename, api_key="FAKEROO", strategy="not_a_strategy")
106+
107+
95108
class MockMultipleResponse:
96109
def __init__(self, status_code):
97110
self.status_code = status_code
@@ -276,3 +289,27 @@ def test_partition_multiple_via_api_from_files_raises_without_filenames(monkeypa
276289
files=files,
277290
api_key="FAKEROO",
278291
)
292+
293+
294+
def test_partition_multiple_via_api_valid_request_data_kwargs():
295+
filenames = [
296+
os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.pdf"),
297+
os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.jpg"),
298+
]
299+
300+
elements = partition_multiple_via_api(filenames=filenames, api_key="FAKEROO", strategy="fast")
301+
assert isinstance(elements, list)
302+
303+
304+
def test_partition_multiple_via_api_invalid_request_data_kwargs():
305+
filenames = [
306+
os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.pdf"),
307+
os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.jpg"),
308+
]
309+
310+
with pytest.raises(ValueError):
311+
partition_multiple_via_api(
312+
filenames=filenames,
313+
api_key="FAKEROO",
314+
strategy="not_a_strategy",
315+
)

Diff for: unstructured/__version__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.7.4-dev0" # pragma: no cover
1+
__version__ = "0.7.4" # pragma: no cover

0 commit comments

Comments
 (0)