Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 6dd7c30

Browse files
authoredApr 9, 2025··
feat/add support for persisting file data changes (#49)
1 parent a43aa42 commit 6dd7c30

File tree

5 files changed

+20
-4
lines changed

5 files changed

+20
-4
lines changed
 

Diff for: ‎CHANGELOG.md

+4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
## 0.0.29
2+
3+
* **Support persisting file data changes**
4+
15
## 0.0.28
26

37
* **Isolate what gets bundled in package**

Diff for: ‎test/api/test_api.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from pathlib import Path
2-
from typing import Any, Optional
2+
from typing import Any, Optional, Union
33

44
import pytest
55
from fastapi.testclient import TestClient
@@ -25,6 +25,7 @@ class InvokeResponse(BaseModel):
2525
filedata_meta: FileDataMeta
2626
status_code_text: Optional[str] = None
2727
output: Optional[Any] = None
28+
file_data: Optional[Union[FileData, BatchFileData]] = None
2829

2930
def generic_validation(self):
3031
assert self.status_code == 200
@@ -121,6 +122,9 @@ def test_filedata_meta(file_data):
121122
filedata_meta = invoke_response.filedata_meta
122123
assert len(filedata_meta.new_records) == 15
123124
assert filedata_meta.terminate_current
125+
file_data = invoke_response.file_data
126+
assert file_data
127+
assert file_data.metadata.record_locator.get("key") == "value"
124128
assert not invoke_response.output
125129

126130

Diff for: ‎test/assets/filedata_meta.py

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ class Output(BaseModel):
1818
def process_input(
1919
i: Input, file_data: Union[FileData, BatchFileData], filedata_meta: FileDataMeta
2020
) -> Optional[Output]:
21+
file_data.metadata.record_locator = {"key": "value"}
2122
if i.m > 10:
2223
filedata_meta.terminate_current = True
2324
new_content = [

Diff for: ‎unstructured_platform_plugins/__version__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.0.28" # pragma: no cover
1+
__version__ = "0.0.29" # pragma: no cover

Diff for: ‎unstructured_platform_plugins/etl_uvicorn/api_generator.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
1212
from pydantic import BaseModel, Field, create_model
1313
from starlette.responses import RedirectResponse
14-
from unstructured_ingest.data_types.file_data import file_data_from_dict
14+
from unstructured_ingest.data_types.file_data import BatchFileData, FileData, file_data_from_dict
1515
from uvicorn.config import LOG_LEVELS
1616
from uvicorn.importer import import_from_string
1717

@@ -31,6 +31,8 @@
3131
schema_to_base_model,
3232
)
3333

34+
FileDataType = Union[FileData, BatchFileData]
35+
3436

3537
class EtlApiException(Exception):
3638
pass
@@ -137,7 +139,8 @@ def _wrap_in_fastapi(
137139
class InvokeResponse(BaseModel):
138140
usage: list[UsageData]
139141
status_code: int
140-
filedata_meta: Optional[filedata_meta_model]
142+
file_data: Optional[FileDataType] = None
143+
filedata_meta: Optional[filedata_meta_model] = None
141144
status_code_text: Optional[str] = None
142145
output: Optional[response_type] = None
143146
message_channels: MessageChannels = Field(default_factory=MessageChannels)
@@ -177,6 +180,7 @@ async def _stream_response():
177180
),
178181
status_code=status.HTTP_200_OK,
179182
output=output,
183+
file_data=request_dict.get("file_data", None),
180184
).model_dump_json()
181185
+ "\n"
182186
)
@@ -202,6 +206,7 @@ async def _stream_response():
202206
filedata_meta=filedata_meta_model.model_validate(filedata_meta.model_dump()),
203207
status_code=status.HTTP_200_OK,
204208
output=output,
209+
file_data=request_dict.get("file_data", None),
205210
)
206211
except UnrecoverableException as ex:
207212
logger.info("Unrecoverable error occurred during plugin invocation")
@@ -211,6 +216,7 @@ async def _stream_response():
211216
status_code=512,
212217
status_code_text=ex.message,
213218
filedata_meta=filedata_meta_model.model_validate(filedata_meta.model_dump()),
219+
file_data=request_dict.get("file_data", None),
214220
)
215221
except Exception as invoke_error:
216222
logger.error(f"failed to invoke plugin: {invoke_error}", exc_info=True)
@@ -221,6 +227,7 @@ async def _stream_response():
221227
filedata_meta=filedata_meta_model.model_validate(filedata_meta.model_dump()),
222228
status_code=http_error.status_code,
223229
status_code_text=f"[{invoke_error.__class__.__name__}] {invoke_error}",
230+
file_data=request_dict.get("file_data", None),
224231
)
225232

226233
if input_schema_model.model_fields:

0 commit comments

Comments
 (0)
Please sign in to comment.