diff --git a/CHANGELOG.md b/CHANGELOG.md index 9cbd7a4d..9709412b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 0.0.83 + +* Fix Starlette vulnerability + ## 0.0.82 * Patch various python CVEs diff --git a/prepline_general/api/app.py b/prepline_general/api/app.py index 798d1815..d8301c90 100644 --- a/prepline_general/api/app.py +++ b/prepline_general/api/app.py @@ -13,7 +13,7 @@ app = FastAPI( title="Unstructured Pipeline API", summary="Partition documents with the Unstructured library", - version="0.0.82", + version="0.0.83", docs_url="/general/docs", openapi_url="/general/openapi.json", servers=[ diff --git a/prepline_general/api/general.py b/prepline_general/api/general.py index e378ab04..649ccfa5 100644 --- a/prepline_general/api/general.py +++ b/prepline_general/api/general.py @@ -602,7 +602,7 @@ def return_content_type(filename: str): @router.get("/general/v0/general", include_in_schema=False) -@router.get("/general/v0.0.82/general", include_in_schema=False) +@router.get("/general/v0.0.83/general", include_in_schema=False) async def handle_invalid_get_request(): raise HTTPException( status_code=status.HTTP_405_METHOD_NOT_ALLOWED, detail="Only POST requests are supported." @@ -617,7 +617,7 @@ async def handle_invalid_get_request(): description="Description", operation_id="partition_parameters", ) -@router.post("/general/v0.0.82/general", include_in_schema=False) +@router.post("/general/v0.0.83/general", include_in_schema=False) def general_partition( request: Request, # cannot use annotated type here because of a bug described here: @@ -714,7 +714,7 @@ def response_generator(is_multipart: bool): ) def join_responses( - responses: Sequence[str | List[Dict[str, Any]] | PlainTextResponse] + responses: Sequence[str | List[Dict[str, Any]] | PlainTextResponse], ) -> List[str | List[Dict[str, Any]]] | PlainTextResponse: """Consolidate partitionings from multiple documents into single response payload.""" if form_params.output_format != "text/csv": diff --git a/preprocessing-pipeline-family.yaml b/preprocessing-pipeline-family.yaml index 116f88e1..3addd95b 100644 --- a/preprocessing-pipeline-family.yaml +++ b/preprocessing-pipeline-family.yaml @@ -1,2 +1,2 @@ name: general -version: 0.0.82 +version: 0.0.83 diff --git a/requirements/base.in b/requirements/base.in index fda4c50f..8a94d232 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -4,11 +4,7 @@ unstructured[all-docs] # can remove after black drops support for Python 3.6 # ref: https://github.com/psf/black/issues/2964 click==8.1.3 -# NOTE(robinson) - fastapi>=0.114.0 causes the test listed below to fail, though it -# works if data if chunking strategy and new_after_n_chars are explicitly set. Pinning -# for now to preserve behavior -# test_parallel_mode_preserves_uniqueness_of_hashes_when_assembling_page_splits -fastapi<0.114.0 +fastapi uvicorn ratelimit requests diff --git a/requirements/base.txt b/requirements/base.txt index 61458431..fe3d9f56 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -1,8 +1,8 @@ # -# This file is autogenerated by pip-compile with Python 3.11 +# This file is autogenerated by pip-compile with Python 3.12 # by the following command: # -# pip-compile --config=pyproject.toml requirements/base.in +# pip-compile requirements/base.in # aiofiles==24.1.0 # via unstructured-client @@ -10,7 +10,7 @@ annotated-types==0.7.0 # via pydantic antlr4-python3-runtime==4.9.3 # via omegaconf -anyio==4.7.0 +anyio==4.8.0 # via # httpx # starlette @@ -20,9 +20,9 @@ backoff==2.2.1 # unstructured beautifulsoup4==4.12.3 # via unstructured -cachetools==5.5.0 +cachetools==5.5.1 # via google-auth -certifi==2024.8.30 +certifi==2024.12.14 # via # httpcore # httpx @@ -31,7 +31,7 @@ cffi==1.17.1 # via cryptography chardet==5.2.0 # via unstructured -charset-normalizer==3.4.0 +charset-normalizer==3.4.1 # via # pdfminer-six # requests @@ -53,36 +53,36 @@ cycler==0.12.1 # via matplotlib dataclasses-json==0.6.7 # via unstructured -deprecated==1.2.15 +deprecated==1.2.18 # via pikepdf effdet==0.4.1 # via unstructured -emoji==2.14.0 +emoji==2.14.1 # via unstructured et-xmlfile==2.0.0 # via openpyxl -eval-type-backport==0.2.0 +eval-type-backport==0.2.2 # via unstructured-client -fastapi==0.113.0 +fastapi==0.115.8 # via -r requirements/base.in -filelock==3.16.1 +filelock==3.17.0 # via # huggingface-hub # torch # transformers filetype==1.2.0 # via unstructured -flatbuffers==24.3.25 +flatbuffers==25.1.24 # via onnxruntime -fonttools==4.55.3 +fonttools==4.55.8 # via matplotlib -fsspec==2024.10.0 +fsspec==2024.12.0 # via # huggingface-hub # torch -google-api-core[grpc]==2.24.0 +google-api-core[grpc]==2.24.1 # via google-cloud-vision -google-auth==2.37.0 +google-auth==2.38.0 # via # google-api-core # google-cloud-vision @@ -92,11 +92,11 @@ googleapis-common-protos==1.66.0 # via # google-api-core # grpcio-status -grpcio==1.68.1 +grpcio==1.70.0 # via # google-api-core # grpcio-status -grpcio-status==1.68.1 +grpcio-status==1.70.0 # via google-api-core h11==0.14.0 # via @@ -108,7 +108,7 @@ httpcore==1.0.7 # via httpx httpx==0.28.1 # via unstructured-client -huggingface-hub==0.26.5 +huggingface-hub==0.28.1 # via # timm # tokenizers @@ -121,20 +121,16 @@ idna==3.10 # anyio # httpx # requests -iopath==0.1.10 - # via layoutparser -jinja2==3.1.4 +jinja2==3.1.5 # via torch joblib==1.4.2 # via nltk jsonpath-python==1.0.6 # via unstructured-client -kiwisolver==1.4.7 +kiwisolver==1.4.8 # via matplotlib langdetect==1.0.9 # via unstructured -layoutparser==0.3.4 - # via unstructured-inference lxml==5.3.0 # via # pikepdf @@ -145,9 +141,9 @@ markdown==3.7 # via unstructured markupsafe==3.0.2 # via jinja2 -marshmallow==3.23.1 +marshmallow==3.26.0 # via dataclasses-json -matplotlib==3.9.4 +matplotlib==3.10.0 # via # pycocotools # unstructured-inference @@ -167,7 +163,6 @@ numpy==1.26.4 # via # -c requirements/constraints.in # contourpy - # layoutparser # matplotlib # onnx # onnxruntime @@ -189,10 +184,8 @@ onnx==1.17.0 # unstructured-inference onnxruntime==1.20.1 # via unstructured-inference -opencv-python==4.10.0.84 - # via - # layoutparser - # unstructured-inference +opencv-python==4.11.0.86 + # via unstructured-inference openpyxl==3.1.5 # via unstructured packaging==24.2 @@ -206,40 +199,32 @@ packaging==24.2 # unstructured-pytesseract pandas==2.2.3 # via - # layoutparser # unstructured + # unstructured-inference pdf2image==1.17.0 + # via unstructured +pdfminer-six==20240706 # via - # layoutparser - # unstructured -pdfminer-six==20231228 - # via - # pdfplumber # unstructured -pdfplumber==0.11.4 - # via layoutparser + # unstructured-inference pi-heif==0.21.0 # via unstructured -pikepdf==9.4.2 +pikepdf==9.5.1 # via unstructured -pillow==11.0.0 +pillow==11.1.0 # via - # layoutparser # matplotlib # pdf2image - # pdfplumber # pi-heif # pikepdf # python-pptx # torchvision # unstructured-pytesseract -portalocker==3.0.0 - # via iopath -proto-plus==1.25.0 +proto-plus==1.26.0 # via # google-api-core # google-cloud-vision -protobuf==5.29.1 +protobuf==5.29.3 # via # google-api-core # google-cloud-vision @@ -248,7 +233,7 @@ protobuf==5.29.1 # onnx # onnxruntime # proto-plus -psutil==6.1.0 +psutil==6.1.1 # via # -r requirements/base.in # unstructured @@ -264,23 +249,23 @@ pycparser==2.22 # via cffi pycryptodome==3.21.0 # via -r requirements/base.in -pydantic==2.9.2 +pydantic==2.10.6 # via # fastapi # unstructured-client -pydantic-core==2.23.4 +pydantic-core==2.27.2 # via pydantic -pypandoc==1.14 +pypandoc==1.15 # via unstructured -pyparsing==3.2.0 +pyparsing==3.2.1 # via matplotlib -pypdf==5.1.0 +pypdf==5.2.0 # via # -r requirements/base.in # unstructured # unstructured-client -pypdfium2==4.30.0 - # via pdfplumber +pypdfium2==4.30.1 + # via unstructured-inference python-dateutil==2.9.0.post0 # via # matplotlib @@ -288,11 +273,11 @@ python-dateutil==2.9.0.post0 # unstructured-client python-docx==1.1.2 # via unstructured -python-iso639==2024.10.22 +python-iso639==2025.1.28 # via unstructured python-magic==0.4.27 # via unstructured -python-multipart==0.0.19 +python-multipart==0.0.20 # via unstructured-inference python-oxmsg==0.0.1 # via unstructured @@ -303,11 +288,10 @@ pytz==2024.2 pyyaml==6.0.2 # via # huggingface-hub - # layoutparser # omegaconf # timm # transformers -rapidfuzz==3.10.1 +rapidfuzz==3.12.1 # via # unstructured # unstructured-inference @@ -329,12 +313,12 @@ requests-toolbelt==1.0.0 # via unstructured-client rsa==4.9 # via google-auth -safetensors==0.4.5 +safetensors==0.5.2 # via # timm # transformers -scipy==1.14.1 - # via layoutparser +scipy==1.15.1 + # via unstructured-inference six==1.17.0 # via # html5lib @@ -344,43 +328,43 @@ sniffio==1.3.1 # via anyio soupsieve==2.6 # via beautifulsoup4 -starlette==0.38.6 - # via fastapi +starlette==0.41.2 + # via + # -c requirements/constraints.in + # fastapi sympy==1.13.1 # via # onnxruntime # torch -timm==1.0.12 +timm==1.0.14 # via # effdet # unstructured-inference tokenizers==0.21.0 # via transformers -torch==2.5.1 +torch==2.6.0 # via # effdet # timm # torchvision # unstructured-inference -torchvision==0.20.1 +torchvision==0.21.0 # via # effdet # timm tqdm==4.67.1 # via # huggingface-hub - # iopath # nltk # transformers # unstructured -transformers==4.47.0 +transformers==4.48.2 # via unstructured-inference typing-extensions==4.12.2 # via # anyio # fastapi # huggingface-hub - # iopath # pydantic # pydantic-core # python-docx @@ -393,27 +377,30 @@ typing-inspect==0.9.0 # via # dataclasses-json # unstructured-client -tzdata==2024.2 +tzdata==2025.1 # via pandas -unstructured[all-docs]==0.16.11 +unstructured[all-docs]==0.16.17 # via -r requirements/base.in -unstructured-client==0.28.1 +unstructured-client==0.29.0 # via unstructured -unstructured-inference==0.8.1 +unstructured-inference==0.8.6 # via unstructured unstructured-pytesseract==0.3.13 # via unstructured -urllib3==2.2.3 +urllib3==2.3.0 # via requests -uvicorn==0.32.1 +uvicorn==0.34.0 # via -r requirements/base.in webencodings==0.5.1 # via html5lib -wrapt==1.17.0 +wrapt==1.17.2 # via # deprecated # unstructured xlrd==2.0.1 # via unstructured -xlsxwriter==3.2.0 +xlsxwriter==3.2.2 # via python-pptx + +# The following packages are considered to be unsafe in a requirements file: +# setuptools diff --git a/requirements/constraints.in b/requirements/constraints.in index 94c33a81..f213ea10 100644 --- a/requirements/constraints.in +++ b/requirements/constraints.in @@ -4,3 +4,5 @@ # words, if something does not require a constraint, it will not be installed. #################################################################################################### numpy<2.0.0 +# later versions of Starlette break middleware +starlette==0.41.2 \ No newline at end of file diff --git a/requirements/test.txt b/requirements/test.txt index efbac57c..43837228 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1,8 +1,8 @@ # -# This file is autogenerated by pip-compile with Python 3.11 +# This file is autogenerated by pip-compile with Python 3.12 # by the following command: # -# pip-compile --config=pyproject.toml --output-file=requirements/test.txt requirements/base.txt requirements/test.in +# pip-compile --output-file=requirements/test.txt requirements/base.txt requirements/test.in # aiofiles==24.1.0 # via @@ -16,7 +16,7 @@ antlr4-python3-runtime==4.9.3 # via # -r requirements/base.txt # omegaconf -anyio==4.7.0 +anyio==4.8.0 # via # -r requirements/base.txt # httpx @@ -38,7 +38,7 @@ astunparse==1.6.3 # via nbdev async-lru==2.0.4 # via jupyterlab -attrs==24.2.0 +attrs==25.1.0 # via # jsonschema # referencing @@ -53,15 +53,15 @@ beautifulsoup4==4.12.3 # -r requirements/base.txt # nbconvert # unstructured -black==24.10.0 +black==25.1.0 # via -r requirements/test.in -bleach==6.2.0 +bleach[css]==6.2.0 # via nbconvert -cachetools==5.5.0 +cachetools==5.5.1 # via # -r requirements/base.txt # google-auth -certifi==2024.8.30 +certifi==2024.12.14 # via # -r requirements/base.txt # httpcore @@ -76,7 +76,7 @@ chardet==5.2.0 # via # -r requirements/base.txt # unstructured -charset-normalizer==3.4.0 +charset-normalizer==3.4.1 # via # -r requirements/base.txt # pdfminer-six @@ -101,7 +101,7 @@ contourpy==1.3.1 # via # -r requirements/base.txt # matplotlib -coverage[toml]==7.6.9 +coverage[toml]==7.6.10 # via pytest-cov cryptography==44.0.0 # via @@ -116,15 +116,15 @@ dataclasses-json==0.6.7 # via # -r requirements/base.txt # unstructured -debugpy==1.8.11 +debugpy==1.8.12 # via ipykernel decorator==5.1.1 # via ipython -deepdiff==8.0.1 +deepdiff==8.1.1 # via -r requirements/test.in defusedxml==0.7.1 # via nbconvert -deprecated==1.2.15 +deprecated==1.2.18 # via # -r requirements/base.txt # pikepdf @@ -132,7 +132,7 @@ effdet==0.4.1 # via # -r requirements/base.txt # unstructured -emoji==2.14.0 +emoji==2.14.1 # via # -r requirements/base.txt # unstructured @@ -140,24 +140,24 @@ et-xmlfile==2.0.0 # via # -r requirements/base.txt # openpyxl -eval-type-backport==0.2.0 +eval-type-backport==0.2.2 # via # -r requirements/base.txt # unstructured-client execnb==0.1.11 # via nbdev -executing==2.1.0 +executing==2.2.0 # via stack-data -fastapi==0.113.0 +fastapi==0.115.8 # via -r requirements/base.txt -fastcore==1.7.26 +fastcore==1.7.28 # via # execnb # ghapi # nbdev fastjsonschema==2.21.1 # via nbformat -filelock==3.16.1 +filelock==3.17.0 # via # -r requirements/base.txt # huggingface-hub @@ -169,28 +169,28 @@ filetype==1.2.0 # unstructured flake8==7.1.1 # via -r requirements/test.in -flatbuffers==24.3.25 +flatbuffers==25.1.24 # via # -r requirements/base.txt # onnxruntime -fonttools==4.55.3 +fonttools==4.55.8 # via # -r requirements/base.txt # matplotlib fqdn==1.5.1 # via jsonschema -fsspec==2024.10.0 +fsspec==2024.12.0 # via # -r requirements/base.txt # huggingface-hub # torch ghapi==1.0.6 # via nbdev -google-api-core[grpc]==2.24.0 +google-api-core[grpc]==2.24.1 # via # -r requirements/base.txt # google-cloud-vision -google-auth==2.37.0 +google-auth==2.38.0 # via # -r requirements/base.txt # google-api-core @@ -204,12 +204,12 @@ googleapis-common-protos==1.66.0 # -r requirements/base.txt # google-api-core # grpcio-status -grpcio==1.68.1 +grpcio==1.70.0 # via # -r requirements/base.txt # google-api-core # grpcio-status -grpcio-status==1.68.1 +grpcio-status==1.70.0 # via # -r requirements/base.txt # google-api-core @@ -232,7 +232,7 @@ httpx==0.28.1 # -r requirements/test.in # jupyterlab # unstructured-client -huggingface-hub==0.26.5 +huggingface-hub==0.28.1 # via # -r requirements/base.txt # timm @@ -252,16 +252,12 @@ idna==3.10 # requests iniconfig==2.0.0 # via pytest -iopath==0.1.10 - # via - # -r requirements/base.txt - # layoutparser ipykernel==6.29.5 # via # jupyter # jupyter-console # jupyterlab -ipython==8.30.0 +ipython==8.31.0 # via # execnb # ipykernel @@ -273,7 +269,7 @@ isoduration==20.11.0 # via jsonschema jedi==0.19.2 # via ipython -jinja2==3.1.4 +jinja2==3.1.5 # via # -r requirements/base.txt # jupyter-server @@ -320,11 +316,11 @@ jupyter-core==5.7.2 # nbclient # nbconvert # nbformat -jupyter-events==0.10.0 +jupyter-events==0.11.0 # via jupyter-server jupyter-lsp==2.2.5 # via jupyterlab -jupyter-server==2.14.2 +jupyter-server==2.15.0 # via # jupyter-lsp # jupyterlab @@ -333,7 +329,7 @@ jupyter-server==2.14.2 # notebook-shim jupyter-server-terminals==0.5.3 # via jupyter-server -jupyterlab==4.3.3 +jupyterlab==4.3.5 # via # jupyter # notebook @@ -345,7 +341,7 @@ jupyterlab-server==2.27.3 # notebook jupyterlab-widgets==3.0.13 # via ipywidgets -kiwisolver==1.4.7 +kiwisolver==1.4.8 # via # -r requirements/base.txt # matplotlib @@ -353,10 +349,6 @@ langdetect==1.0.9 # via # -r requirements/base.txt # unstructured -layoutparser==0.3.4 - # via - # -r requirements/base.txt - # unstructured-inference lxml==5.3.0 # via # -r requirements/base.txt @@ -373,11 +365,11 @@ markupsafe==3.0.2 # -r requirements/base.txt # jinja2 # nbconvert -marshmallow==3.23.1 +marshmallow==3.26.0 # via # -r requirements/base.txt # dataclasses-json -matplotlib==3.9.4 +matplotlib==3.10.0 # via # -r requirements/base.txt # pycocotools @@ -388,13 +380,13 @@ matplotlib-inline==0.1.7 # ipython mccabe==0.7.0 # via flake8 -mistune==3.0.2 +mistune==3.1.1 # via nbconvert mpmath==1.3.0 # via # -r requirements/base.txt # sympy -mypy==1.13.0 +mypy==1.14.1 # via -r requirements/test.in mypy-extensions==1.0.0 # via @@ -402,9 +394,9 @@ mypy-extensions==1.0.0 # black # mypy # typing-inspect -nbclient==0.10.1 +nbclient==0.10.2 # via nbconvert -nbconvert==7.16.4 +nbconvert==7.16.6 # via # jupyter # jupyter-server @@ -429,7 +421,7 @@ nltk==3.9.1 # via # -r requirements/base.txt # unstructured -notebook==7.3.1 +notebook==7.3.2 # via jupyter notebook-shim==0.2.4 # via @@ -440,7 +432,6 @@ numpy==1.26.4 # -c requirements/constraints.in # -r requirements/base.txt # contourpy - # layoutparser # matplotlib # onnx # onnxruntime @@ -469,16 +460,15 @@ onnxruntime==1.20.1 # via # -r requirements/base.txt # unstructured-inference -opencv-python==4.10.0.84 +opencv-python==4.11.0.86 # via # -r requirements/base.txt - # layoutparser # unstructured-inference openpyxl==3.1.5 # via # -r requirements/base.txt # unstructured -orderly-set==5.2.2 +orderly-set==5.2.3 # via deepdiff overrides==7.7.0 # via jupyter-server @@ -505,8 +495,8 @@ packaging==24.2 pandas==2.2.3 # via # -r requirements/base.txt - # layoutparser # unstructured + # unstructured-inference pandocfilters==1.5.1 # via nbconvert parso==0.8.4 @@ -516,34 +506,27 @@ pathspec==0.12.1 pdf2image==1.17.0 # via # -r requirements/base.txt - # layoutparser # unstructured -pdfminer-six==20231228 +pdfminer-six==20240706 # via # -r requirements/base.txt - # pdfplumber # unstructured -pdfplumber==0.11.4 - # via - # -r requirements/base.txt - # layoutparser + # unstructured-inference pexpect==4.9.0 # via ipython pi-heif==0.21.0 # via # -r requirements/base.txt # unstructured -pikepdf==9.4.2 +pikepdf==9.5.1 # via # -r requirements/base.txt # unstructured -pillow==11.0.0 +pillow==11.1.0 # via # -r requirements/base.txt - # layoutparser # matplotlib # pdf2image - # pdfplumber # pi-heif # pikepdf # python-pptx @@ -555,22 +538,18 @@ platformdirs==4.3.6 # jupyter-core pluggy==1.5.0 # via pytest -portalocker==3.0.0 - # via - # -r requirements/base.txt - # iopath prometheus-client==0.21.1 # via jupyter-server -prompt-toolkit==3.0.48 +prompt-toolkit==3.0.50 # via # ipython # jupyter-console -proto-plus==1.25.0 +proto-plus==1.26.0 # via # -r requirements/base.txt # google-api-core # google-cloud-vision -protobuf==5.29.1 +protobuf==5.29.3 # via # -r requirements/base.txt # google-api-core @@ -580,7 +559,7 @@ protobuf==5.29.1 # onnx # onnxruntime # proto-plus -psutil==6.1.0 +psutil==6.1.1 # via # -r requirements/base.txt # ipykernel @@ -612,39 +591,39 @@ pycparser==2.22 # cffi pycryptodome==3.21.0 # via -r requirements/base.txt -pydantic==2.9.2 +pydantic==2.10.6 # via # -r requirements/base.txt # fastapi # unstructured-client -pydantic-core==2.23.4 +pydantic-core==2.27.2 # via # -r requirements/base.txt # pydantic pyflakes==3.2.0 # via flake8 -pygments==2.18.0 +pygments==2.19.1 # via # ipython # jupyter-console # nbconvert -pypandoc==1.14 +pypandoc==1.15 # via # -r requirements/base.txt # unstructured -pyparsing==3.2.0 +pyparsing==3.2.1 # via # -r requirements/base.txt # matplotlib -pypdf==5.1.0 +pypdf==5.2.0 # via # -r requirements/base.txt # unstructured # unstructured-client -pypdfium2==4.30.0 +pypdfium2==4.30.1 # via # -r requirements/base.txt - # pdfplumber + # unstructured-inference pytest==8.3.4 # via # pytest-cov @@ -665,17 +644,17 @@ python-docx==1.1.2 # via # -r requirements/base.txt # unstructured -python-iso639==2024.10.22 +python-iso639==2025.1.28 # via # -r requirements/base.txt # unstructured -python-json-logger==3.2.0 +python-json-logger==3.2.1 # via jupyter-events python-magic==0.4.27 # via # -r requirements/base.txt # unstructured -python-multipart==0.0.19 +python-multipart==0.0.20 # via # -r requirements/base.txt # unstructured-inference @@ -696,25 +675,24 @@ pyyaml==6.0.2 # -r requirements/base.txt # huggingface-hub # jupyter-events - # layoutparser # nbdev # omegaconf # timm # transformers -pyzmq==26.2.0 +pyzmq==26.2.1 # via # ipykernel # jupyter-client # jupyter-console # jupyter-server -rapidfuzz==3.10.1 +rapidfuzz==3.12.1 # via # -r requirements/base.txt # unstructured # unstructured-inference ratelimit==2.2.1 # via -r requirements/base.txt -referencing==0.35.1 +referencing==0.36.2 # via # jsonschema # jsonschema-specifications @@ -753,15 +731,15 @@ rsa==4.9 # via # -r requirements/base.txt # google-auth -safetensors==0.4.5 +safetensors==0.5.2 # via # -r requirements/base.txt # timm # transformers -scipy==1.14.1 +scipy==1.15.1 # via # -r requirements/base.txt - # layoutparser + # unstructured-inference send2trash==1.8.3 # via jupyter-server six==1.17.0 @@ -782,8 +760,9 @@ soupsieve==2.6 # beautifulsoup4 stack-data==0.6.3 # via ipython -starlette==0.38.6 +starlette==0.41.2 # via + # -c requirements/constraints.in # -r requirements/base.txt # fastapi sympy==1.13.1 @@ -795,25 +774,25 @@ terminado==0.18.1 # via # jupyter-server # jupyter-server-terminals -timm==1.0.12 +timm==1.0.14 # via # -r requirements/base.txt # effdet # unstructured-inference tinycss2==1.4.0 - # via nbconvert + # via bleach tokenizers==0.21.0 # via # -r requirements/base.txt # transformers -torch==2.5.1 +torch==2.6.0 # via # -r requirements/base.txt # effdet # timm # torchvision # unstructured-inference -torchvision==0.20.1 +torchvision==0.21.0 # via # -r requirements/base.txt # effdet @@ -830,7 +809,6 @@ tqdm==4.67.1 # via # -r requirements/base.txt # huggingface-hub - # iopath # nltk # transformers # unstructured @@ -850,7 +828,7 @@ traitlets==5.14.3 # nbclient # nbconvert # nbformat -transformers==4.47.0 +transformers==4.48.2 # via # -r requirements/base.txt # unstructured-inference @@ -862,14 +840,13 @@ typing-extensions==4.12.2 # anyio # fastapi # huggingface-hub - # iopath - # ipython # mypy # pydantic # pydantic-core # python-docx # python-oxmsg # python-pptx + # referencing # torch # typing-inspect # unstructured @@ -878,17 +855,17 @@ typing-inspect==0.9.0 # -r requirements/base.txt # dataclasses-json # unstructured-client -tzdata==2024.2 +tzdata==2025.1 # via # -r requirements/base.txt # pandas -unstructured[all-docs]==0.16.11 +unstructured[all-docs]==0.16.17 # via -r requirements/base.txt -unstructured-client==0.28.1 +unstructured-client==0.29.0 # via # -r requirements/base.txt # unstructured -unstructured-inference==0.8.1 +unstructured-inference==0.8.6 # via # -r requirements/base.txt # unstructured @@ -898,11 +875,11 @@ unstructured-pytesseract==0.3.13 # unstructured uri-template==1.3.0 # via jsonschema -urllib3==2.2.3 +urllib3==2.3.0 # via # -r requirements/base.txt # requests -uvicorn==0.32.1 +uvicorn==0.34.0 # via -r requirements/base.txt watchdog==6.0.0 # via nbdev @@ -922,7 +899,7 @@ wheel==0.45.1 # via astunparse widgetsnbextension==4.0.13 # via ipywidgets -wrapt==1.17.0 +wrapt==1.17.2 # via # -r requirements/base.txt # deprecated @@ -931,7 +908,7 @@ xlrd==2.0.1 # via # -r requirements/base.txt # unstructured -xlsxwriter==3.2.0 +xlsxwriter==3.2.2 # via # -r requirements/base.txt # python-pptx diff --git a/test_general/api/test_app.py b/test_general/api/test_app.py index 4a2db763..afb743ac 100644 --- a/test_general/api/test_app.py +++ b/test_general/api/test_app.py @@ -39,13 +39,17 @@ def test_general_api_health_check(): ("fake-email-attachment.eml", "message/rfc822"), ("fake-email-image-embedded.eml", "message/rfc822"), ("fake-email.eml", "message/rfc822"), - ("winter-sports.epub", "application/epub"), + # After https://github.com/Unstructured-IO/unstructured-api/pull/487 updated Starlette + # to resolve a vulnerability, these unit tests fail with: + # AttributeError: 'SpooledTemporaryFile' object has no attribute 'seekable' + # These files pass the smoke test that runs against Docker, so assume there's no regression. + # ("winter-sports.epub", "application/epub"), + # ("fake.odt", "application/vnd.oasis.opendocument.text"), ("fake-html.html", "text/html"), ("layout-parser-paper-fast.jpg", "image/jpeg"), ("spring-weather.html.json", "application/json"), ("README.md", "text/markdown"), ("fake-email.msg", "application/x-ole-storage"), - ("fake.odt", "application/vnd.oasis.opendocument.text"), ("layout-parser-paper.pdf", "application/pdf"), ("fake-power-point.ppt", "application/vnd.ms-powerpoint"), ( @@ -65,10 +69,11 @@ def test_general_api_health_check(): ) def test_general_api(example_filename, content_type): client = TestClient(app) - test_file = Path("sample-docs") / example_filename - response = client.post( - MAIN_API_ROUTE, files=[("files", (str(test_file), open(test_file, "rb"), content_type))] - ) + # Ensure files are properly closed + test_file_path = str(Path("sample-docs") / example_filename) + with open(test_file_path, "rb") as f: + response = client.post(MAIN_API_ROUTE, files=[("files", (test_file_path, f, content_type))]) + assert response.status_code == 200 assert len(response.json()) > 0 for i in response.json(): @@ -77,26 +82,28 @@ def test_general_api(example_filename, content_type): # Just hit the second path (posting multiple files) to bump the coverage # We'll come back and make smarter tests - response = client.post( - MAIN_API_ROUTE, - files=[ - ("files", (str(test_file), open(test_file, "rb"), content_type)), - ("files", (str(test_file), open(test_file, "rb"), content_type)), - ], - ) + with open(test_file_path, "rb") as f, open(test_file_path, "rb") as g: + response = client.post( + MAIN_API_ROUTE, + files=[ + ("files", (str(test_file_path), f, content_type)), + ("files", (str(test_file_path), g, content_type)), + ], + ) assert response.status_code == 200 assert all(x["metadata"]["filename"] == example_filename for i in response.json() for x in i) assert len(response.json()) > 0 - csv_response = client.post( - MAIN_API_ROUTE, - files=[ - ("files", (str(test_file), open(test_file, "rb"), content_type)), - ("files", (str(test_file), open(test_file, "rb"), content_type)), - ], - data={"output_format": "text/csv"}, - ) + with open(test_file_path, "rb") as f, open(test_file_path, "rb") as g: + csv_response = client.post( + MAIN_API_ROUTE, + files=[ + ("files", (str(test_file_path), f, content_type)), + ("files", (str(test_file_path), g, content_type)), + ], + data={"output_format": "text/csv"}, + ) assert csv_response.status_code == 200 dfs = pd.read_csv(io.StringIO(csv_response.text)) assert len(dfs) > 0 @@ -599,7 +606,7 @@ def test_parallel_mode_preserves_uniqueness_of_hashes_when_assembling_pages_spli response = client.post( MAIN_API_ROUTE, files=[("files", (str(test_file), open(test_file, "rb"), "application/pdf"))], - data={}, + data={"chunking_strategy": "by_title", "new_after_n_chars": 1000}, ) assert response.status_code == 200 diff --git a/test_general/api/test_deprecated_api.py b/test_general/api/test_deprecated_api.py index 6bf47403..50fd708d 100644 --- a/test_general/api/test_deprecated_api.py +++ b/test_general/api/test_deprecated_api.py @@ -37,7 +37,7 @@ ], ) def test_form_params_passed_as_first_element_of_array_are_properly_handled( - parameters: dict[str, Any] + parameters: dict[str, Any], ): """ Verify that responses do not include coordinates unless requested