Skip to content

Commit 91b892c

Browse files
authored
fix: Fix api_url param to partition_via_api (#2342)
Closes #2340 We need to make sure the custom url is passed to our client. The client constructor takes the base url, so for compatibility we can continue to take the full url and strip off the path. To verify, run the api locally and confirm you can make calls to it. ``` # In unstructured-api make run-web-app # In ipython in this repo from unstructured.partition.api import partition_via_api filename = "example-docs/layout-parser-paper.pdf" partition_via_api(filename=filename, api_url="http://localhost:8000") ```
1 parent 1b70ea8 commit 91b892c

File tree

4 files changed

+43
-9
lines changed

4 files changed

+43
-9
lines changed

Diff for: CHANGELOG.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
## 0.11.7-dev4
1+
## 0.11.7
22

33
### Enhancements
44

@@ -14,6 +14,7 @@
1414

1515
* **Fix table structure metric script** Update the call to table agent to now provide OCR tokens as required
1616
* **Fix element extraction not working when using "auto" strategy for pdf and image** If element extraction is specified, the "auto" strategy falls back to the "hi_res" strategy.
17+
* **Fix a bug passing a custom url to `partition_via_api`** Users that self host the api were not able to pass their custom url to `partition_via_api`.
1718

1819
## 0.11.6
1920

Diff for: test_unstructured/partition/test_api.py

+35-5
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22
import json
33
import os
44
import pathlib
5+
from unittest.mock import ANY, Mock
56

67
import pytest
78
import requests
89
from unstructured_client.general import General
9-
from unstructured_client.models.errors.sdkerror import SDKError
1010

1111
from unstructured.documents.elements import NarrativeText
1212
from unstructured.partition.api import partition_multiple_via_api, partition_via_api
@@ -45,6 +45,7 @@ def __init__(self, status_code):
4545
# layer in the new unstructured-client:
4646
# `elements_from_json(text=response.raw_response.text)`
4747
self.raw_response = MockRawResponse()
48+
self.headers = {"Content-Type": "application/json"}
4849

4950
def json(self):
5051
return json.loads(self.text)
@@ -71,6 +72,34 @@ def test_partition_via_api_from_filename(monkeypatch):
7172
assert elements[0].metadata.filetype == "message/rfc822"
7273

7374

75+
def test_partition_via_api_custom_url(monkeypatch):
76+
"""
77+
Assert that we can specify api_url and requests are sent to the right place
78+
"""
79+
mock_request = Mock(return_value=MockResponse(status_code=200))
80+
81+
monkeypatch.setattr(requests.Session, "request", mock_request)
82+
filename = os.path.join(DIRECTORY, "..", "..", "example-docs", EML_TEST_FILE)
83+
custom_url = "http://localhost:8000/general/v0/general"
84+
85+
with open(filename, "rb") as f:
86+
partition_via_api(file=f, api_url=custom_url, metadata_filename=filename)
87+
88+
mock_request.assert_called_with(
89+
"POST", custom_url, data=ANY, files=ANY, headers=ANY, params=ANY
90+
)
91+
92+
# The sdk uses the server url, so we should be able to pass that as well
93+
base_url = "http://localhost:8000"
94+
95+
with open(filename, "rb") as f:
96+
partition_via_api(file=f, api_url=base_url, metadata_filename=filename)
97+
98+
mock_request.assert_called_with(
99+
"POST", custom_url, data=ANY, files=ANY, headers=ANY, params=ANY
100+
)
101+
102+
74103
def test_partition_via_api_from_file(monkeypatch):
75104
monkeypatch.setattr(
76105
General,
@@ -181,10 +210,11 @@ def test_partition_via_api_valid_request_data_kwargs():
181210
assert isinstance(elements, list)
182211

183212

184-
def test_partition_via_api_invalid_request_data_kwargs():
185-
filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.pdf")
186-
with pytest.raises(SDKError):
187-
partition_via_api(filename=filename, strategy="not_a_strategy")
213+
# Note(austin) - This test is way too noisy against the hosted api
214+
# def test_partition_via_api_invalid_request_data_kwargs():
215+
# filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "layout-parser-paper-fast.pdf")
216+
# with pytest.raises(SDKError):
217+
# partition_via_api(filename=filename, strategy="not_a_strategy")
188218

189219

190220
class MockMultipleResponse:

Diff for: unstructured/__version__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.11.7-dev4" # pragma: no cover
1+
__version__ = "0.11.7" # pragma: no cover

Diff for: unstructured/partition/api.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,10 @@ def partition_via_api(
6565
"Please use metadata_filename instead.",
6666
)
6767

68-
s = UnstructuredClient(api_key_auth=api_key)
68+
# Note(austin) - the sdk takes the base url, but we have the full api_url
69+
# For consistency, just strip off the path when it's given
70+
base_url = api_url[:-19] if "/general/v0/general" in api_url else api_url
71+
sdk = UnstructuredClient(api_key_auth=api_key, server_url=base_url)
6972

7073
if filename is not None:
7174
with open(filename, "rb") as f:
@@ -89,7 +92,7 @@ def partition_via_api(
8992
files=files,
9093
**request_kwargs,
9194
)
92-
response = s.general.partition(req)
95+
response = sdk.general.partition(req)
9396

9497
if response.status_code == 200:
9598
return elements_from_json(text=response.raw_response.text)

0 commit comments

Comments
 (0)