Skip to content

Commit 9b45894

Browse files
authored
release: 0.0.82 (#481)
Per CHANGELOG update: ## 0.0.82 * Bump to `unstructured` 0.16.11 * No longer attempts to download NLTK asset from S3 which could result in a 403
1 parent d9afddf commit 9b45894

File tree

7 files changed

+185
-309
lines changed

7 files changed

+185
-309
lines changed

CHANGELOG.md

+5
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
## 0.0.82
2+
3+
* Bump to `unstructured` 0.16.11
4+
* No longer attempts to download NLTK asset from S3 which could result in a 403
5+
16
## 0.0.81
27

38
* Update `strategy` parameter to allow `'` and `"` as input surrounding the value.

prepline_general/api/app.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
app = FastAPI(
1414
title="Unstructured Pipeline API",
1515
summary="Partition documents with the Unstructured library",
16-
version="0.0.81",
16+
version="0.0.82",
1717
docs_url="/general/docs",
1818
openapi_url="/general/openapi.json",
1919
servers=[

prepline_general/api/general.py

+2-53
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@
4343
elements_from_json,
4444
)
4545
from unstructured_inference.models.base import UnknownModelException
46-
from unstructured_inference.models.chipper import MODEL_TYPES as CHIPPER_MODEL_TYPES
4746

4847
app = FastAPI()
4948
router = APIRouter()
@@ -214,37 +213,6 @@ def partition_pdf_splits(
214213
return results
215214

216215

217-
is_chipper_processing = False
218-
219-
220-
class ChipperMemoryProtection:
221-
"""Chipper calls are expensive, and right now we can only do one call at a time.
222-
223-
If the model is in use, return a 503 error. The API should scale up and the user can try again
224-
on a different server.
225-
"""
226-
227-
def __enter__(self):
228-
global is_chipper_processing
229-
if is_chipper_processing:
230-
# Log here so we can track how often it happens
231-
logger.error("Chipper is already is use")
232-
raise HTTPException(
233-
status_code=503, detail="Server is under heavy load. Please try again later."
234-
)
235-
236-
is_chipper_processing = True
237-
238-
def __exit__(
239-
self,
240-
exc_type: Optional[type[BaseException]],
241-
exc_value: Optional[BaseException],
242-
exc_tb: Optional[TracebackType],
243-
):
244-
global is_chipper_processing
245-
is_chipper_processing = False
246-
247-
248216
def pipeline_api(
249217
file: IO[bytes],
250218
request: Request,
@@ -331,7 +299,6 @@ def pipeline_api(
331299
if file_content_type == "application/pdf":
332300
_check_pdf(file)
333301

334-
hi_res_model_name = _validate_hi_res_model_name(hi_res_model_name, coordinates)
335302
strategy = _validate_strategy(strategy)
336303
pdf_infer_table_structure = _set_pdf_infer_table_structure(
337304
pdf_infer_table_structure,
@@ -417,9 +384,6 @@ def pipeline_api(
417384
coordinates=coordinates,
418385
**partition_kwargs, # type: ignore # pyright: ignore[reportGeneralTypeIssues]
419386
)
420-
elif hi_res_model_name and hi_res_model_name in CHIPPER_MODEL_TYPES:
421-
with ChipperMemoryProtection():
422-
elements = partition(**partition_kwargs) # type: ignore # pyright: ignore[reportGeneralTypeIssues]
423387
else:
424388
elements = partition(**partition_kwargs) # type: ignore # pyright: ignore[reportGeneralTypeIssues]
425389

@@ -533,21 +497,6 @@ def _validate_strategy(strategy: str) -> str:
533497
return strategy
534498

535499

536-
def _validate_hi_res_model_name(
537-
hi_res_model_name: Optional[str], show_coordinates: bool
538-
) -> Optional[str]:
539-
# Make sure chipper aliases to the latest model
540-
if hi_res_model_name and hi_res_model_name == "chipper":
541-
hi_res_model_name = "chipperv2"
542-
543-
if hi_res_model_name and hi_res_model_name in CHIPPER_MODEL_TYPES and show_coordinates:
544-
raise HTTPException(
545-
status_code=400,
546-
detail=f"coordinates aren't available when using the {hi_res_model_name} model type",
547-
)
548-
return hi_res_model_name
549-
550-
551500
def _validate_chunking_strategy(chunking_strategy: Optional[str]) -> Optional[str]:
552501
"""Raise on `chunking_strategy` is not a valid chunking strategy name.
553502
@@ -653,7 +602,7 @@ def return_content_type(filename: str):
653602

654603

655604
@router.get("/general/v0/general", include_in_schema=False)
656-
@router.get("/general/v0.0.81/general", include_in_schema=False)
605+
@router.get("/general/v0.0.82/general", include_in_schema=False)
657606
async def handle_invalid_get_request():
658607
raise HTTPException(
659608
status_code=status.HTTP_405_METHOD_NOT_ALLOWED, detail="Only POST requests are supported."
@@ -668,7 +617,7 @@ async def handle_invalid_get_request():
668617
description="Description",
669618
operation_id="partition_parameters",
670619
)
671-
@router.post("/general/v0.0.81/general", include_in_schema=False)
620+
@router.post("/general/v0.0.82/general", include_in_schema=False)
672621
def general_partition(
673622
request: Request,
674623
# cannot use annotated type here because of a bug described here:

preprocessing-pipeline-family.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
name: general
2-
version: 0.0.81
2+
version: 0.0.82

0 commit comments

Comments
 (0)