diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ba08a3c11..b1fd165d5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -118,6 +118,10 @@ jobs: uses: actions/setup-python@v6 with: python-version: ${{ env.PYTHON_VERSION }} + - name: Free up disk space + run: | + # Clear some space (https://github.com/actions/runner-images/issues/2840) + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/share/boost - name: Test Dockerfile run: | python${{ env.PYTHON_VERSION }} -m venv .venv diff --git a/CHANGELOG.md b/CHANGELOG.md index 362cc6048..3cbebab3b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 0.0.90 +* Upgrade version to pull in latest unstructured verison and bump versions of dependancies. + ## 0.0.89 * Upgrade Pillow to 11.3.0 to address a CVE diff --git a/Makefile b/Makefile index 183c22d08..6ba7038a0 100644 --- a/Makefile +++ b/Makefile @@ -38,11 +38,29 @@ install-nltk-models: python3 -c "from unstructured.nlp.tokenize import download_nltk_packages; download_nltk_packages()" ## pip-compile: compiles all base/dev/test requirements +SHELL := /bin/bash +BASE_REQUIREMENTS := $(shell ls ./requirements/*.in) +BASE_REQUIREMENTSTXT := $(patsubst %.in,%.txt,$(BASE_REQUIREMENTS)) + .PHONY: pip-compile -pip-compile: - pip-compile --upgrade requirements/base.in +pip-compile: compile-all-base + +.PHONY: compile-test +compile-test: pip-compile --upgrade -o requirements/test.txt requirements/base.txt requirements/test.in +.PHONY: compile-base +compile-base: + pip-compile --upgrade requirements/base.in + +.PHONY: compile-all-base +compile-all-base: compile-base compile-test + @$(foreach file,$(BASE_REQUIREMENTS),echo -e "\n\ncompiling: $(file)" && pip-compile --no-strip-extras --upgrade $(file) || exit;) + +.PHONY: clean-requirements +clean-requirements: + rm $(BASE_REQUIREMENTSTXT) + .PHONY: install-pandoc install-pandoc: ARCH=${ARCH} ./scripts/install-pandoc.sh diff --git a/prepline_general/api/__version__.py b/prepline_general/api/__version__.py index e2f986fa3..8a4d4ffeb 100644 --- a/prepline_general/api/__version__.py +++ b/prepline_general/api/__version__.py @@ -1 +1 @@ -__version__ = "0.0.89" # pragma: no cover +__version__ = "0.0.90" # pragma: no cover diff --git a/preprocessing-pipeline-family.yaml b/preprocessing-pipeline-family.yaml index 28fc06cbc..6a5492b15 100644 --- a/preprocessing-pipeline-family.yaml +++ b/preprocessing-pipeline-family.yaml @@ -1,2 +1,2 @@ name: general -version: 0.0.89 +version: 0.0.90 diff --git a/requirements/base.txt b/requirements/base.txt index e67d257b5..547449170 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -2,17 +2,19 @@ # This file is autogenerated by pip-compile with Python 3.12 # by the following command: # -# pip-compile requirements/base.in +# pip-compile --no-strip-extras ./requirements/base.in # -accelerate==1.7.0 +accelerate==1.11.0 # via unstructured-inference -aiofiles==24.1.0 +aiofiles==25.1.0 # via unstructured-client +annotated-doc==0.0.3 + # via fastapi annotated-types==0.7.0 # via pydantic antlr4-python3-runtime==4.9.3 # via omegaconf -anyio==4.9.0 +anyio==4.11.0 # via # httpx # starlette @@ -20,24 +22,23 @@ backoff==2.2.1 # via # -r requirements/base.in # unstructured -beautifulsoup4==4.13.4 +beautifulsoup4==4.14.2 # via unstructured -cachetools==5.5.2 +cachetools==6.2.1 # via google-auth -certifi==2025.4.26 +certifi==2025.10.5 # via # httpcore # httpx # requests -cffi==1.17.1 +cffi==2.0.0 # via cryptography -chardet==5.2.0 - # via unstructured -charset-normalizer==3.4.2 +charset-normalizer==3.4.4 # via # pdfminer-six # requests -click==8.2.1 + # unstructured +click==8.3.0 # via # -r requirements/base.in # nltk @@ -45,72 +46,76 @@ click==8.2.1 # uvicorn coloredlogs==15.0.1 # via onnxruntime -contourpy==1.3.2 +contourpy==1.3.3 # via matplotlib -cryptography==45.0.4 +cryptography==46.0.3 # via + # msoffcrypto-tool # pdfminer-six # unstructured-client cycler==0.12.1 # via matplotlib dataclasses-json==0.6.7 # via unstructured -deprecated==1.2.18 +deprecated==1.3.1 # via pikepdf effdet==0.4.1 # via unstructured -emoji==2.14.1 +emoji==2.15.0 # via unstructured et-xmlfile==2.0.0 # via openpyxl -fastapi==0.115.12 +fastapi==0.121.0 # via -r requirements/base.in -filelock==3.18.0 +filelock==3.20.0 # via # huggingface-hub # torch # transformers filetype==1.2.0 # via unstructured -flatbuffers==25.2.10 +flatbuffers==25.9.23 # via onnxruntime -fonttools==4.58.2 +fonttools==4.60.1 # via matplotlib -fsspec==2025.5.1 +fsspec==2025.10.0 # via # huggingface-hub # torch -google-api-core[grpc]==2.25.1 +google-api-core[grpc]==2.28.1 # via google-cloud-vision -google-auth==2.40.3 +google-auth==2.43.0 # via # google-api-core # google-cloud-vision -google-cloud-vision==3.10.2 +google-cloud-vision==3.11.0 # via unstructured -googleapis-common-protos==1.70.0 +googleapis-common-protos==1.72.0 # via # google-api-core # grpcio-status -grpcio==1.73.0 +grpcio==1.76.0 # via # google-api-core + # google-cloud-vision # grpcio-status -grpcio-status==1.73.0 +grpcio-status==1.76.0 # via google-api-core h11==0.16.0 # via # httpcore # uvicorn -hf-xet==1.1.3 +hf-xet==1.2.0 # via huggingface-hub html5lib==1.1 # via unstructured httpcore==1.0.9 - # via httpx + # via + # httpx + # unstructured-client httpx==0.28.1 # via unstructured-client -huggingface-hub==0.33.0 +huggingface-hub==0.36.0 # via # accelerate # timm @@ -119,51 +124,54 @@ huggingface-hub==0.33.0 # unstructured-inference humanfriendly==10.0 # via coloredlogs -idna==3.10 +idna==3.11 # via # anyio # httpx # requests jinja2==3.1.6 # via torch -joblib==1.5.1 +joblib==1.5.2 # via nltk -kiwisolver==1.4.8 +kiwisolver==1.4.9 # via matplotlib langdetect==1.0.9 # via unstructured -lxml==5.4.0 +lxml==6.0.2 # via # pikepdf # python-docx # python-pptx # unstructured -markdown==3.8 +markdown==3.10 # via unstructured -markupsafe==3.0.2 +markupsafe==3.0.3 # via jinja2 marshmallow==3.26.1 # via dataclasses-json -matplotlib==3.10.3 +matplotlib==3.10.7 # via unstructured-inference +ml-dtypes==0.5.3 + # via onnx mpmath==1.3.0 # via sympy +msoffcrypto-tool==5.4.2 + # via unstructured mypy-extensions==1.1.0 # via typing-inspect -nest-asyncio==1.6.0 - # via unstructured-client networkx==3.5 # via # torch # unstructured -nltk==3.9.1 +nltk==3.9.2 # via unstructured numpy==1.26.4 # via - # -c ./requirements/constraints.in + # -c requirements/constraints.in # accelerate # contourpy # matplotlib + # ml-dtypes # onnx # onnxruntime # opencv-python @@ -175,14 +183,16 @@ numpy==1.26.4 # unstructured # unstructured-inference olefile==0.47 - # via python-oxmsg + # via + # msoffcrypto-tool + # python-oxmsg omegaconf==2.3.0 # via effdet -onnx==1.18.0 +onnx==1.19.1 # via # unstructured # unstructured-inference -onnxruntime==1.22.0 +onnxruntime==1.23.2 # via # unstructured # unstructured-inference @@ -200,7 +210,7 @@ packaging==25.0 # pikepdf # transformers # unstructured-pytesseract -pandas==2.3.0 +pandas==2.3.3 # via # unstructured # unstructured-inference @@ -210,11 +220,11 @@ pdfminer-six==20250506 # via # unstructured # unstructured-inference -pi-heif==0.22.0 +pi-heif==1.1.1 # via unstructured -pikepdf==9.8.1 +pikepdf==10.0.0 # via unstructured -pillow==11.3.0 +pillow==12.0.0 # via # matplotlib # pdf2image @@ -227,7 +237,7 @@ proto-plus==1.26.1 # via # google-api-core # google-cloud-vision -protobuf==6.31.1 +protobuf==6.33.0 # via # google-api-core # google-cloud-vision @@ -236,7 +246,7 @@ protobuf==6.31.1 # onnx # onnxruntime # proto-plus -psutil==7.0.0 +psutil==7.1.3 # via # -r requirements/base.in # accelerate @@ -249,32 +259,32 @@ pyasn1-modules==0.4.2 # via google-auth pycocotools==2.0.10 # via effdet -pycparser==2.22 +pycparser==2.23 # via cffi pycryptodome==3.23.0 # via -r requirements/base.in -pydantic==2.11.5 +pydantic==2.12.4 # via # fastapi # unstructured-client -pydantic-core==2.33.2 +pydantic-core==2.41.5 # via pydantic pypandoc==1.15 # via unstructured -pyparsing==3.2.3 +pyparsing==3.2.5 # via matplotlib -pypdf==5.6.0 +pypdf==6.1.3 # via # -r requirements/base.in # unstructured # unstructured-client -pypdfium2==4.30.1 +pypdfium2==5.0.0 # via unstructured-inference python-dateutil==2.9.0.post0 # via # matplotlib # pandas -python-docx==1.1.2 +python-docx==1.2.0 # via unstructured python-iso639==2025.2.18 # via unstructured @@ -288,24 +298,24 @@ python-pptx==1.0.2 # via unstructured pytz==2025.2 # via pandas -pyyaml==6.0.2 +pyyaml==6.0.3 # via # accelerate # huggingface-hub # omegaconf # timm # transformers -rapidfuzz==3.13.0 +rapidfuzz==3.14.3 # via # unstructured # unstructured-inference ratelimit==2.2.1 # via -r requirements/base.in -regex==2024.11.6 +regex==2025.11.3 # via # nltk # transformers -requests==2.32.4 +requests==2.32.5 # via # -r requirements/base.in # google-api-core @@ -317,12 +327,12 @@ requests-toolbelt==1.0.0 # via unstructured-client rsa==4.9.1 # via google-auth -safetensors==0.5.3 +safetensors==0.6.2 # via # accelerate # timm # transformers -scipy==1.15.3 +scipy==1.16.3 # via unstructured-inference six==1.17.0 # via @@ -331,30 +341,30 @@ six==1.17.0 # python-dateutil sniffio==1.3.1 # via anyio -soupsieve==2.7 +soupsieve==2.8 # via beautifulsoup4 starlette==0.41.2 # via - # -c ./requirements/constraints.in + # -c requirements/constraints.in # fastapi sympy==1.14.0 # via # onnxruntime # torch -timm==1.0.15 +timm==1.0.22 # via # effdet # unstructured-inference -tokenizers==0.21.1 +tokenizers==0.22.1 # via transformers -torch==2.7.1 +torch==2.9.0 # via # accelerate # effdet # timm # torchvision # unstructured-inference -torchvision==0.22.1 +torchvision==0.24.0 # via # effdet # timm @@ -364,13 +374,14 @@ tqdm==4.67.1 # nltk # transformers # unstructured -transformers==4.52.4 +transformers==4.57.1 # via unstructured-inference -typing-extensions==4.14.0 +typing-extensions==4.15.0 # via # anyio # beautifulsoup4 # fastapi + # grpcio # huggingface-hub # onnx # pydantic @@ -384,31 +395,31 @@ typing-extensions==4.14.0 # unstructured typing-inspect==0.9.0 # via dataclasses-json -typing-inspection==0.4.1 +typing-inspection==0.4.2 # via pydantic tzdata==2025.2 # via pandas -unstructured[all-docs]==0.17.2 +unstructured[all-docs]==0.18.18 # via -r requirements/base.in -unstructured-client==0.36.0 +unstructured-client==0.42.3 # via unstructured -unstructured-inference==1.0.5 +unstructured-inference==1.1.1 # via unstructured unstructured-pytesseract==0.3.15 # via unstructured -urllib3==2.4.0 +urllib3==2.5.0 # via requests -uvicorn==0.34.3 +uvicorn==0.38.0 # via -r requirements/base.in webencodings==0.5.1 # via html5lib -wrapt==1.17.2 +wrapt==2.0.1 # via # deprecated # unstructured -xlrd==2.0.1 +xlrd==2.0.2 # via unstructured -xlsxwriter==3.2.3 +xlsxwriter==3.2.9 # via python-pptx # The following packages are considered to be unsafe in a requirements file: diff --git a/requirements/constraints.txt b/requirements/constraints.txt new file mode 100644 index 000000000..c1b2a25a3 --- /dev/null +++ b/requirements/constraints.txt @@ -0,0 +1,18 @@ +# +# This file is autogenerated by pip-compile with Python 3.12 +# by the following command: +# +# pip-compile --no-strip-extras ./requirements/constraints.in +# +anyio==4.11.0 + # via starlette +idna==3.11 + # via anyio +numpy==1.26.4 + # via -r requirements/constraints.in +sniffio==1.3.1 + # via anyio +starlette==0.41.2 + # via -r requirements/constraints.in +typing-extensions==4.15.0 + # via anyio diff --git a/requirements/test.txt b/requirements/test.txt index ca1e5b7c4..1f92115a4 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -2,37 +2,19 @@ # This file is autogenerated by pip-compile with Python 3.12 # by the following command: # -# pip-compile --output-file=requirements/test.txt requirements/base.txt requirements/test.in +# pip-compile --no-strip-extras ./requirements/test.in # -accelerate==1.7.0 +anyio==4.11.0 # via - # -r requirements/base.txt - # unstructured-inference -aiofiles==24.1.0 - # via - # -r requirements/base.txt - # unstructured-client -annotated-types==0.7.0 - # via - # -r requirements/base.txt - # pydantic -antlr4-python3-runtime==4.9.3 - # via - # -r requirements/base.txt - # omegaconf -anyio==4.9.0 - # via - # -r requirements/base.txt # httpx # jupyter-server - # starlette appnope==0.1.4 # via ipykernel argon2-cffi==25.1.0 # via jupyter-server -argon2-cffi-bindings==21.2.0 +argon2-cffi-bindings==25.1.0 # via argon2-cffi -arrow==1.3.0 +arrow==1.4.0 # via isoduration asttokens==3.0.0 # via @@ -42,229 +24,88 @@ astunparse==1.6.3 # via nbdev async-lru==2.0.5 # via jupyterlab -attrs==25.3.0 +attrs==25.4.0 # via # jsonschema # referencing babel==2.17.0 # via jupyterlab-server -backoff==2.2.1 - # via - # -r requirements/base.txt - # unstructured -beautifulsoup4==4.13.4 - # via - # -r requirements/base.txt - # nbconvert - # unstructured -black==25.1.0 +beautifulsoup4==4.14.2 + # via nbconvert +black==25.9.0 # via -r requirements/test.in -bleach[css]==6.2.0 +bleach[css]==6.3.0 # via nbconvert -cachetools==5.5.2 - # via - # -r requirements/base.txt - # google-auth -certifi==2025.4.26 +build==1.3.0 + # via nbdev +certifi==2025.10.5 # via - # -r requirements/base.txt # httpcore # httpx # requests -cffi==1.17.1 - # via - # -r requirements/base.txt - # argon2-cffi-bindings - # cryptography -chardet==5.2.0 - # via - # -r requirements/base.txt - # unstructured -charset-normalizer==3.4.2 - # via - # -r requirements/base.txt - # pdfminer-six - # requests -click==8.2.1 +cffi==2.0.0 + # via argon2-cffi-bindings +charset-normalizer==3.4.4 + # via requests +click==8.3.0 # via - # -r requirements/base.txt # -r requirements/test.in # black - # nltk - # python-oxmsg - # uvicorn -coloredlogs==15.0.1 - # via - # -r requirements/base.txt - # onnxruntime -comm==0.2.2 +comm==0.2.3 # via # ipykernel # ipywidgets -contourpy==1.3.2 - # via - # -r requirements/base.txt - # matplotlib -coverage[toml]==7.9.0 +coverage[toml]==7.11.1 # via pytest-cov -cryptography==45.0.4 - # via - # -r requirements/base.txt - # pdfminer-six - # unstructured-client -cycler==0.12.1 - # via - # -r requirements/base.txt - # matplotlib -dataclasses-json==0.6.7 - # via - # -r requirements/base.txt - # unstructured -debugpy==1.8.14 +debugpy==1.8.17 # via ipykernel decorator==5.2.1 # via ipython -deepdiff==8.5.0 +deepdiff==8.6.1 # via -r requirements/test.in defusedxml==0.7.1 # via nbconvert -deprecated==1.2.18 - # via - # -r requirements/base.txt - # pikepdf -effdet==0.4.1 - # via - # -r requirements/base.txt - # unstructured -emoji==2.14.1 - # via - # -r requirements/base.txt - # unstructured -et-xmlfile==2.0.0 - # via - # -r requirements/base.txt - # openpyxl -execnb==0.1.14 +execnb==0.1.15 # via nbdev execnet==2.1.1 # via pytest-xdist -executing==2.2.0 +executing==2.2.1 # via stack-data -fastapi==0.115.12 - # via -r requirements/base.txt -fastcore==1.8.2 +fastcore==1.8.15 # via # execnb # ghapi # nbdev -fastjsonschema==2.21.1 +fastjsonschema==2.21.2 # via nbformat -filelock==3.18.0 - # via - # -r requirements/base.txt - # huggingface-hub - # torch - # transformers -filetype==1.2.0 - # via - # -r requirements/base.txt - # unstructured -flake8==7.2.0 +flake8==7.3.0 # via -r requirements/test.in -flatbuffers==25.2.10 - # via - # -r requirements/base.txt - # onnxruntime -fonttools==4.58.2 - # via - # -r requirements/base.txt - # matplotlib fqdn==1.5.1 # via jsonschema -fsspec==2025.5.1 - # via - # -r requirements/base.txt - # huggingface-hub - # torch -ghapi==1.0.6 +ghapi==1.0.8 # via nbdev -google-api-core[grpc]==2.25.1 - # via - # -r requirements/base.txt - # google-cloud-vision -google-auth==2.40.3 - # via - # -r requirements/base.txt - # google-api-core - # google-cloud-vision -google-cloud-vision==3.10.2 - # via - # -r requirements/base.txt - # unstructured -googleapis-common-protos==1.70.0 - # via - # -r requirements/base.txt - # google-api-core - # grpcio-status -grpcio==1.73.0 - # via - # -r requirements/base.txt - # google-api-core - # grpcio-status -grpcio-status==1.73.0 - # via - # -r requirements/base.txt - # google-api-core h11==0.16.0 - # via - # -r requirements/base.txt - # httpcore - # uvicorn -hf-xet==1.1.3 - # via - # -r requirements/base.txt - # huggingface-hub -html5lib==1.1 - # via - # -r requirements/base.txt - # unstructured + # via httpcore httpcore==1.0.9 - # via - # -r requirements/base.txt - # httpx + # via httpx httpx==0.28.1 # via - # -r requirements/base.txt # -r requirements/test.in # jupyterlab - # unstructured-client -huggingface-hub==0.33.0 - # via - # -r requirements/base.txt - # accelerate - # timm - # tokenizers - # transformers - # unstructured-inference -humanfriendly==10.0 - # via - # -r requirements/base.txt - # coloredlogs -idna==3.10 - # via - # -r requirements/base.txt +idna==3.11 + # via # anyio # httpx # jsonschema # requests -iniconfig==2.1.0 +iniconfig==2.3.0 # via pytest -ipykernel==6.29.5 +ipykernel==7.1.0 # via # jupyter # jupyter-console # jupyterlab -ipython==9.3.0 +ipython==9.7.0 # via # execnb # ipykernel @@ -272,7 +113,7 @@ ipython==9.3.0 # jupyter-console ipython-pygments-lexers==1.1.1 # via ipython -ipywidgets==8.1.7 +ipywidgets==8.1.8 # via jupyter isoduration==20.11.0 # via jsonschema @@ -280,26 +121,20 @@ jedi==0.19.2 # via ipython jinja2==3.1.6 # via - # -r requirements/base.txt # jupyter-server # jupyterlab # jupyterlab-server # nbconvert - # torch -joblib==1.5.1 - # via - # -r requirements/base.txt - # nltk -json5==0.12.0 +json5==0.12.1 # via jupyterlab-server jsonpointer==3.0.0 # via jsonschema -jsonschema[format-nongpl]==4.24.0 +jsonschema[format-nongpl]==4.25.1 # via # jupyter-events # jupyterlab-server # nbformat -jsonschema-specifications==2025.4.1 +jsonschema-specifications==2025.9.1 # via jsonschema jupyter==1.1.1 # via -r requirements/test.in @@ -311,7 +146,7 @@ jupyter-client==8.6.3 # nbclient jupyter-console==6.6.3 # via jupyter -jupyter-core==5.8.1 +jupyter-core==5.9.1 # via # ipykernel # jupyter-client @@ -323,9 +158,9 @@ jupyter-core==5.8.1 # nbformat jupyter-events==0.12.0 # via jupyter-server -jupyter-lsp==2.2.5 +jupyter-lsp==2.3.0 # via jupyterlab -jupyter-server==2.16.0 +jupyter-server==2.17.0 # via # jupyter-lsp # jupyterlab @@ -334,77 +169,45 @@ jupyter-server==2.16.0 # notebook-shim jupyter-server-terminals==0.5.3 # via jupyter-server -jupyterlab==4.4.3 +jupyterlab==4.4.10 # via # jupyter # notebook jupyterlab-pygments==0.3.0 # via nbconvert -jupyterlab-server==2.27.3 +jupyterlab-server==2.28.0 # via # jupyterlab # notebook -jupyterlab-widgets==3.0.15 +jupyterlab-widgets==3.0.16 # via ipywidgets -kiwisolver==1.4.8 - # via - # -r requirements/base.txt - # matplotlib -langdetect==1.0.9 - # via - # -r requirements/base.txt - # unstructured -lxml==5.4.0 - # via - # -r requirements/base.txt - # pikepdf - # python-docx - # python-pptx - # unstructured -markdown==3.8 +lark==1.3.1 + # via rfc3987-syntax +markupsafe==3.0.3 # via - # -r requirements/base.txt - # unstructured -markupsafe==3.0.2 - # via - # -r requirements/base.txt # jinja2 # nbconvert -marshmallow==3.26.1 - # via - # -r requirements/base.txt - # dataclasses-json -matplotlib==3.10.3 - # via - # -r requirements/base.txt - # unstructured-inference -matplotlib-inline==0.1.7 +matplotlib-inline==0.2.1 # via # ipykernel # ipython mccabe==0.7.0 # via flake8 -mistune==3.1.3 +mistune==3.1.4 # via nbconvert -mpmath==1.3.0 - # via - # -r requirements/base.txt - # sympy -mypy==1.16.0 +mypy==1.18.2 # via -r requirements/test.in mypy-extensions==1.1.0 # via - # -r requirements/base.txt # black # mypy - # typing-inspect nbclient==0.10.2 # via nbconvert nbconvert==7.16.6 # via # jupyter # jupyter-server -nbdev==2.4.2 +nbdev==2.4.6 # via -r requirements/test.in nbformat==5.10.4 # via @@ -412,137 +215,40 @@ nbformat==5.10.4 # nbclient # nbconvert nest-asyncio==1.6.0 - # via - # -r requirements/base.txt - # ipykernel - # unstructured-client -networkx==3.5 - # via - # -r requirements/base.txt - # torch - # unstructured -nltk==3.9.1 - # via - # -r requirements/base.txt - # unstructured -notebook==7.4.3 + # via ipykernel +notebook==7.4.7 # via jupyter notebook-shim==0.2.4 # via # jupyterlab # notebook -numpy==1.26.4 - # via - # -c ./requirements/constraints.in - # -r requirements/base.txt - # accelerate - # contourpy - # matplotlib - # onnx - # onnxruntime - # opencv-python - # pandas - # pycocotools - # scipy - # torchvision - # transformers - # unstructured - # unstructured-inference -olefile==0.47 - # via - # -r requirements/base.txt - # python-oxmsg -omegaconf==2.3.0 - # via - # -r requirements/base.txt - # effdet -onnx==1.18.0 - # via - # -r requirements/base.txt - # unstructured - # unstructured-inference -onnxruntime==1.22.0 - # via - # -r requirements/base.txt - # unstructured - # unstructured-inference -opencv-python==4.11.0.86 - # via - # -r requirements/base.txt - # unstructured-inference -openpyxl==3.1.5 - # via - # -r requirements/base.txt - # unstructured -orderly-set==5.4.1 +orderly-set==5.5.0 # via deepdiff -overrides==7.7.0 - # via jupyter-server packaging==25.0 # via - # -r requirements/base.txt - # accelerate # black + # build # fastcore # ghapi - # huggingface-hub # ipykernel # jupyter-events # jupyter-server # jupyterlab # jupyterlab-server - # marshmallow - # matplotlib # nbconvert # nbdev - # onnxruntime - # pikepdf # pytest - # transformers - # unstructured-pytesseract -pandas==2.3.0 - # via - # -r requirements/base.txt - # unstructured - # unstructured-inference pandocfilters==1.5.1 # via nbconvert -parso==0.8.4 +parso==0.8.5 # via jedi pathspec==0.12.1 # via # black # mypy -pdf2image==1.17.0 - # via - # -r requirements/base.txt - # unstructured -pdfminer-six==20250506 - # via - # -r requirements/base.txt - # unstructured - # unstructured-inference pexpect==4.9.0 # via ipython -pi-heif==0.22.0 - # via - # -r requirements/base.txt - # unstructured -pikepdf==9.8.1 - # via - # -r requirements/base.txt - # unstructured -pillow==11.3.0 - # via - # -r requirements/base.txt - # matplotlib - # pdf2image - # pi-heif - # pikepdf - # python-pptx - # torchvision - # unstructured-pytesseract -platformdirs==4.3.8 +platformdirs==4.5.0 # via # black # jupyter-core @@ -550,189 +256,71 @@ pluggy==1.6.0 # via # pytest # pytest-cov -prometheus-client==0.22.1 +prometheus-client==0.23.1 # via jupyter-server -prompt-toolkit==3.0.51 +prompt-toolkit==3.0.52 # via # ipython # jupyter-console -proto-plus==1.26.1 - # via - # -r requirements/base.txt - # google-api-core - # google-cloud-vision -protobuf==6.31.1 - # via - # -r requirements/base.txt - # google-api-core - # google-cloud-vision - # googleapis-common-protos - # grpcio-status - # onnx - # onnxruntime - # proto-plus -psutil==7.0.0 - # via - # -r requirements/base.txt - # accelerate - # ipykernel - # unstructured +psutil==7.1.3 + # via ipykernel ptyprocess==0.7.0 # via # pexpect # terminado pure-eval==0.2.3 # via stack-data -pyasn1==0.6.1 - # via - # -r requirements/base.txt - # pyasn1-modules - # rsa -pyasn1-modules==0.4.2 - # via - # -r requirements/base.txt - # google-auth -pycocotools==2.0.10 - # via - # -r requirements/base.txt - # effdet -pycodestyle==2.13.0 +pycodestyle==2.14.0 # via flake8 -pycparser==2.22 - # via - # -r requirements/base.txt - # cffi -pycryptodome==3.23.0 - # via -r requirements/base.txt -pydantic==2.11.5 - # via - # -r requirements/base.txt - # fastapi - # unstructured-client -pydantic-core==2.33.2 - # via - # -r requirements/base.txt - # pydantic -pyflakes==3.3.2 +pycparser==2.23 + # via cffi +pyflakes==3.4.0 # via flake8 -pygments==2.19.1 +pygments==2.19.2 # via # ipython # ipython-pygments-lexers # jupyter-console # nbconvert # pytest -pypandoc==1.15 - # via - # -r requirements/base.txt - # unstructured -pyparsing==3.2.3 - # via - # -r requirements/base.txt - # matplotlib -pypdf==5.6.0 - # via - # -r requirements/base.txt - # unstructured - # unstructured-client -pypdfium2==4.30.1 - # via - # -r requirements/base.txt - # unstructured-inference -pytest==8.4.0 +pyproject-hooks==1.2.0 + # via build +pytest==8.4.2 # via # pytest-cov # pytest-mock # pytest-xdist -pytest-cov==6.2.1 +pytest-cov==7.0.0 # via -r requirements/test.in -pytest-mock==3.14.1 +pytest-mock==3.15.1 # via -r requirements/test.in -pytest-xdist==3.7.0 +pytest-xdist==3.8.0 # via -r requirements/test.in python-dateutil==2.9.0.post0 # via - # -r requirements/base.txt # arrow # jupyter-client - # matplotlib - # pandas -python-docx==1.1.2 - # via - # -r requirements/base.txt - # unstructured -python-iso639==2025.2.18 - # via - # -r requirements/base.txt - # unstructured -python-json-logger==3.3.0 +python-json-logger==4.0.0 # via jupyter-events -python-magic==0.4.27 - # via - # -r requirements/base.txt - # unstructured -python-multipart==0.0.20 - # via - # -r requirements/base.txt - # unstructured-inference -python-oxmsg==0.0.2 - # via - # -r requirements/base.txt - # unstructured -python-pptx==1.0.2 +pytokens==0.3.0 + # via black +pyyaml==6.0.3 # via - # -r requirements/base.txt - # unstructured -pytz==2025.2 - # via - # -r requirements/base.txt - # pandas -pyyaml==6.0.2 - # via - # -r requirements/base.txt - # accelerate - # huggingface-hub # jupyter-events # nbdev - # omegaconf - # timm - # transformers -pyzmq==26.4.0 +pyzmq==27.1.0 # via # ipykernel # jupyter-client # jupyter-console # jupyter-server -rapidfuzz==3.13.0 - # via - # -r requirements/base.txt - # unstructured - # unstructured-inference -ratelimit==2.2.1 - # via -r requirements/base.txt -referencing==0.36.2 +referencing==0.37.0 # via # jsonschema # jsonschema-specifications # jupyter-events -regex==2024.11.6 - # via - # -r requirements/base.txt - # nltk - # transformers -requests==2.32.4 - # via - # -r requirements/base.txt - # google-api-core - # huggingface-hub - # jupyterlab-server - # requests-toolbelt - # transformers - # unstructured -requests-toolbelt==1.0.0 - # via - # -r requirements/base.txt - # unstructured-client +requests==2.32.5 + # via jupyterlab-server rfc3339-validator==0.1.4 # via # jsonschema @@ -741,83 +329,32 @@ rfc3986-validator==0.1.1 # via # jsonschema # jupyter-events -rpds-py==0.25.1 +rfc3987-syntax==1.1.0 + # via jsonschema +rpds-py==0.28.0 # via # jsonschema # referencing -rsa==4.9.1 - # via - # -r requirements/base.txt - # google-auth -safetensors==0.5.3 - # via - # -r requirements/base.txt - # accelerate - # timm - # transformers -scipy==1.15.3 - # via - # -r requirements/base.txt - # unstructured-inference send2trash==1.8.3 # via jupyter-server six==1.17.0 # via - # -r requirements/base.txt # astunparse - # html5lib - # langdetect # python-dateutil # rfc3339-validator sniffio==1.3.1 - # via - # -r requirements/base.txt - # anyio -soupsieve==2.7 - # via - # -r requirements/base.txt - # beautifulsoup4 + # via anyio +soupsieve==2.8 + # via beautifulsoup4 stack-data==0.6.3 # via ipython -starlette==0.41.2 - # via - # -c ./requirements/constraints.in - # -r requirements/base.txt - # fastapi -sympy==1.14.0 - # via - # -r requirements/base.txt - # onnxruntime - # torch terminado==0.18.1 # via # jupyter-server # jupyter-server-terminals -timm==1.0.15 - # via - # -r requirements/base.txt - # effdet - # unstructured-inference tinycss2==1.4.0 # via bleach -tokenizers==0.21.1 - # via - # -r requirements/base.txt - # transformers -torch==2.7.1 - # via - # -r requirements/base.txt - # accelerate - # effdet - # timm - # torchvision - # unstructured-inference -torchvision==0.22.1 - # via - # -r requirements/base.txt - # effdet - # timm -tornado==6.5.1 +tornado==6.5.2 # via # ipykernel # jupyter-client @@ -825,16 +362,8 @@ tornado==6.5.1 # jupyterlab # notebook # terminado -tqdm==4.67.1 - # via - # -r requirements/base.txt - # huggingface-hub - # nltk - # transformers - # unstructured traitlets==5.14.3 # via - # comm # ipykernel # ipython # ipywidgets @@ -848,96 +377,34 @@ traitlets==5.14.3 # nbclient # nbconvert # nbformat -transformers==4.52.4 - # via - # -r requirements/base.txt - # unstructured-inference -types-python-dateutil==2.9.0.20250516 - # via arrow -typing-extensions==4.14.0 +typing-extensions==4.15.0 # via - # -r requirements/base.txt # anyio # beautifulsoup4 - # fastapi - # huggingface-hub # mypy - # onnx - # pydantic - # pydantic-core - # python-docx - # python-oxmsg - # python-pptx # referencing - # torch - # typing-inspect - # typing-inspection - # unstructured -typing-inspect==0.9.0 - # via - # -r requirements/base.txt - # dataclasses-json -typing-inspection==0.4.1 - # via - # -r requirements/base.txt - # pydantic tzdata==2025.2 - # via - # -r requirements/base.txt - # pandas -unstructured[all-docs]==0.17.2 - # via -r requirements/base.txt -unstructured-client==0.36.0 - # via - # -r requirements/base.txt - # unstructured -unstructured-inference==1.0.5 - # via - # -r requirements/base.txt - # unstructured -unstructured-pytesseract==0.3.15 - # via - # -r requirements/base.txt - # unstructured + # via arrow uri-template==1.3.0 # via jsonschema -urllib3==2.4.0 - # via - # -r requirements/base.txt - # requests -uvicorn==0.34.3 - # via -r requirements/base.txt +urllib3==2.5.0 + # via requests watchdog==6.0.0 # via nbdev -wcwidth==0.2.13 +wcwidth==0.2.14 # via prompt-toolkit -webcolors==24.11.1 +webcolors==25.10.0 # via jsonschema webencodings==0.5.1 # via - # -r requirements/base.txt # bleach - # html5lib # tinycss2 -websocket-client==1.8.0 +websocket-client==1.9.0 # via jupyter-server wheel==0.45.1 # via astunparse -widgetsnbextension==4.0.14 +widgetsnbextension==4.0.15 # via ipywidgets -wrapt==1.17.2 - # via - # -r requirements/base.txt - # deprecated - # unstructured -xlrd==2.0.1 - # via - # -r requirements/base.txt - # unstructured -xlsxwriter==3.2.3 - # via - # -r requirements/base.txt - # python-pptx # The following packages are considered to be unsafe in a requirements file: # setuptools diff --git a/scripts/docker-smoke-test.sh b/scripts/docker-smoke-test.sh index 808c88052..6dcfc242e 100755 --- a/scripts/docker-smoke-test.sh +++ b/scripts/docker-smoke-test.sh @@ -7,7 +7,7 @@ # Is there a good way to reuse code here? # Also note this can evolve into a generalized pipeline smoke test -# shellcheck disable=SC2317 # Shellcheck complains that trap functions are unreachable... +# shellcheck disable=SC2317,SC2329 # Shellcheck complains that trap functions are unreachable/unused... set -e