Skip to content

Commit 2c81930

Browse files
authored
feat: Return a 503 if available memory is low (#203)
Add `UNSTRUCTURED_MEMORY_FREE_MINIMUM_MB`. When this is set, and the host available memory is below this value, we'll start returning 503. The exception is if the request originates from `10.4.x.x` or `10.5.x.x`. These are parallel mode requests for a doc already in progress (hosted api). Also, move the `safetensors` pin to a new `constraints.in` file. To test, set the variable super high and confirm that you get a 503: ``` export UNSTRUCTURED_MEMORY_FREE_MINIMUM_MB=30000 make run-web-app curl 'http://localhost:8000/general/v0/general' --form files=@sample-docs/layout-parser-paper-fast.pdf {"detail":"Server is under heavy load. Please try again later."}% ```
1 parent 1b2a3ff commit 2c81930

File tree

10 files changed

+95
-11
lines changed

10 files changed

+95
-11
lines changed

Diff for: CHANGELOG.md

+4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
## 0.0.40-dev0
2+
3+
* Reject traffic when we're overloaded via `UNSTRUCTURED_MEMORY_FREE_MINIMUM_MB`
4+
15
## 0.0.39
26

37
* Fix wrong handleing on param skip_infer_table_types

Diff for: pipeline-notebooks/pipeline-general.ipynb

+17
Original file line numberDiff line numberDiff line change
@@ -553,6 +553,7 @@
553553
"from pypdf import PdfReader, PdfWriter\n",
554554
"from unstructured.partition.auto import partition\n",
555555
"from unstructured.staging.base import convert_to_isd, convert_to_dataframe, elements_from_json\n",
556+
"import psutil\n",
556557
"import requests\n",
557558
"import time"
558559
]
@@ -767,6 +768,22 @@
767768
" \"m_xml_keep_tags\": m_xml_keep_tags,\n",
768769
" }, default=str)\n",
769770
" ))\n",
771+
"\n",
772+
" # If this var is set, reject traffic when free memory is below minimum\n",
773+
" # Allow internal requests - these are parallel calls already in progress\n",
774+
" mem = psutil.virtual_memory()\n",
775+
" memory_free_minimum = int(os.environ.get(\"UNSTRUCTURED_MEMORY_FREE_MINIMUM_MB\", 0))\n",
776+
"\n",
777+
" if memory_free_minimum > 0 and mem.available <= memory_free_minimum * 1024 * 1024:\n",
778+
" # Note(yuming): Use X-Forwarded-For header to find the orginal IP for external API\n",
779+
" # requests,since LB forwards requests in AWS\n",
780+
" origin_ip = request.headers.get(\"X-Forwarded-For\") or request.client.host\n",
781+
"\n",
782+
" if not origin_ip.startswith(\"10.\"):\n",
783+
" raise HTTPException(\n",
784+
" status_code=503, detail=\"Server is under heavy load. Please try again later.\"\n",
785+
" )\n",
786+
"\n",
770787
" if filename.endswith(\".msg\"):\n",
771788
" # Note(yuming): convert file type for msg files\n",
772789
" # since fast api might sent the wrong one.\n",

Diff for: prepline_general/api/general.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from pypdf import PdfReader, PdfWriter
2525
from unstructured.partition.auto import partition
2626
from unstructured.staging.base import convert_to_isd, convert_to_dataframe, elements_from_json
27+
import psutil
2728
import requests
2829
import time
2930
from unstructured_inference.models.chipper import MODEL_TYPES as CHIPPER_MODEL_TYPES
@@ -233,6 +234,22 @@ def pipeline_api(
233234
)
234235
)
235236
)
237+
238+
# If this var is set, reject traffic when free memory is below minimum
239+
# Allow internal requests - these are parallel calls already in progress
240+
mem = psutil.virtual_memory()
241+
memory_free_minimum = int(os.environ.get("UNSTRUCTURED_MEMORY_FREE_MINIMUM_MB", 0))
242+
243+
if memory_free_minimum > 0 and mem.available <= memory_free_minimum * 1024 * 1024:
244+
# Note(yuming): Use X-Forwarded-For header to find the orginal IP for external API
245+
# requests,since LB forwards requests in AWS
246+
origin_ip = request.headers.get("X-Forwarded-For") or request.client.host
247+
248+
if not origin_ip.startswith("10."):
249+
raise HTTPException(
250+
status_code=503, detail="Server is under heavy load. Please try again later."
251+
)
252+
236253
if filename.endswith(".msg"):
237254
# Note(yuming): convert file type for msg files
238255
# since fast api might sent the wrong one.
@@ -485,7 +502,7 @@ def return_content_type(filename):
485502

486503

487504
@router.post("/general/v0/general")
488-
@router.post("/general/v0.0.39/general")
505+
@router.post("/general/v0.0.40/general")
489506
def pipeline_1(
490507
request: Request,
491508
gz_uncompressed_content_type: Optional[str] = Form(default=None),

Diff for: preprocessing-pipeline-family.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
name: general
2-
version: 0.0.39
2+
version: 0.0.40

Diff for: requirements/base.in

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
-c constraints.in
12
unstructured[local-inference]>=0.8.1
23
unstructured-api-tools>=0.10.10
34
pydantic<2.0.2
@@ -9,6 +10,5 @@ ratelimit
910
requests
1011
pypdf
1112
pycryptodome
12-
# Note(austin) higher versions need rust installed? pinning this until we address in the dockerfile
13-
safetensors<0.3.3
13+
psutil
1414

Diff for: requirements/base.txt

+5-3
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ jupyter-core==5.3.1
123123
# nbformat
124124
jupyterlab-pygments==0.2.2
125125
# via nbconvert
126-
kiwisolver==1.4.4
126+
kiwisolver==1.4.5
127127
# via matplotlib
128128
layoutparser[layoutmodels,tesseract]==0.3.4
129129
# via unstructured-inference
@@ -230,6 +230,8 @@ portalocker==2.7.0
230230
# via iopath
231231
protobuf==4.24.1
232232
# via onnxruntime
233+
psutil==5.9.5
234+
# via -r requirements/base.in
233235
pycocotools==2.0.7
234236
# via effdet
235237
pycparser==2.21
@@ -306,7 +308,7 @@ rpds-py==0.9.2
306308
# referencing
307309
safetensors==0.3.2
308310
# via
309-
# -r requirements/base.in
311+
# -c requirements/constraints.in
310312
# timm
311313
# transformers
312314
scipy==1.10.1
@@ -397,7 +399,7 @@ uvicorn[standard]==0.23.2
397399
# via unstructured-api-tools
398400
uvloop==0.17.0
399401
# via uvicorn
400-
watchfiles==0.19.0
402+
watchfiles==0.20.0
401403
# via uvicorn
402404
webencodings==0.5.1
403405
# via

Diff for: requirements/constraints.in

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
####################################################################################################
2+
# This file can house global constraints that aren't *direct* requirements of the package or any
3+
# extras. Putting a dependency here will only affect dependency sets that contain them -- in other
4+
# words, if something does not require a constraint, it will not be installed.
5+
####################################################################################################
6+
# Note(austin) - preventing a build error with 0.3.3
7+
safetensors<0.3.3

Diff for: requirements/test.in

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
-c constraints.in
12
black
23
# NOTE(mrobinson) - Pinning click due to a unicode issue in black
34
# can remove after black drops support for Python 3.6
@@ -9,4 +10,4 @@ pytest-cov
910
pytest-mock
1011
nbdev
1112
jupyter
12-
httpx
13+
httpx

Diff for: requirements/test.txt

+6-3
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ jupyterlab-server==2.24.0
330330
# notebook
331331
jupyterlab-widgets==3.0.8
332332
# via ipywidgets
333-
kiwisolver==1.4.4
333+
kiwisolver==1.4.5
334334
# via
335335
# -r requirements/base.txt
336336
# matplotlib
@@ -544,7 +544,9 @@ protobuf==4.24.1
544544
# -r requirements/base.txt
545545
# onnxruntime
546546
psutil==5.9.5
547-
# via ipykernel
547+
# via
548+
# -r requirements/base.txt
549+
# ipykernel
548550
ptyprocess==0.7.0
549551
# via
550552
# pexpect
@@ -698,6 +700,7 @@ rpds-py==0.9.2
698700
# referencing
699701
safetensors==0.3.2
700702
# via
703+
# -c requirements/constraints.in
701704
# -r requirements/base.txt
702705
# timm
703706
# transformers
@@ -872,7 +875,7 @@ uvloop==0.17.0
872875
# uvicorn
873876
watchdog==3.0.0
874877
# via nbdev
875-
watchfiles==0.19.0
878+
watchfiles==0.20.0
876879
# via
877880
# -r requirements/base.txt
878881
# uvicorn

Diff for: test_general/api/test_app.py

+33
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,39 @@ def test_general_api_returns_400_bad_pdf():
387387
tmp.close()
388388

389389

390+
def test_general_api_returns_503(monkeypatch, mocker):
391+
"""
392+
When available memory is below the minimum. return a 503, unless our origin ip is 10.{4,5}.x.x
393+
"""
394+
monkeypatch.setenv("UNSTRUCTURED_MEMORY_FREE_MINIMUM_MB", "30000")
395+
396+
client = TestClient(app)
397+
test_file = Path("sample-docs") / "fake-xml.xml"
398+
response = client.post(
399+
MAIN_API_ROUTE,
400+
files=[("files", (str(test_file), open(test_file, "rb")))],
401+
)
402+
403+
assert response.status_code == 503
404+
405+
mock_client = mocker.patch("fastapi.Request.client")
406+
mock_client.host = "10.5.0.0"
407+
response = client.post(
408+
MAIN_API_ROUTE,
409+
files=[("files", (str(test_file), open(test_file, "rb")))],
410+
)
411+
412+
assert response.status_code == 200
413+
414+
mock_client.host = "10.4.0.0"
415+
response = client.post(
416+
MAIN_API_ROUTE,
417+
files=[("files", (str(test_file), open(test_file, "rb")))],
418+
)
419+
420+
assert response.status_code == 200
421+
422+
390423
class MockResponse:
391424
def __init__(self, status_code):
392425
self.status_code = status_code

0 commit comments

Comments
 (0)