-
Notifications
You must be signed in to change notification settings - Fork 158
/
Copy pathapp.py
110 lines (86 loc) · 3.32 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
from fastapi import FastAPI, Request, HTTPException
from fastapi.responses import JSONResponse
from fastapi.datastructures import FormData
import logging
import os
from prepline_general.api.endpoints import router as general_router
from prepline_general.api.openapi import set_custom_openapi
logger = logging.getLogger("unstructured_api")
app = FastAPI(
title="Unstructured Pipeline API",
summary="Partition documents with the Unstructured library",
version="0.0.68",
docs_url="/general/docs",
openapi_url="/general/openapi.json",
servers=[
{
"url": "https://api.unstructured.io",
"description": "Hosted API",
"x-speakeasy-server-id": "prod",
},
{
"url": "http://localhost:8000",
"description": "Development server",
"x-speakeasy-server-id": "local",
},
],
openapi_tags=[{"name": "general"}],
)
app.include_router(general_router)
# Note(austin) - This logger just dumps exceptions
# We'd rather handle those below, so disable this in deployments
uvicorn_logger = logging.getLogger("uvicorn.error")
if os.environ.get("ENV") in ["dev", "prod"]:
uvicorn_logger.disabled = True
# Catch all HTTPException for uniform logging and response
@app.exception_handler(HTTPException)
async def http_error_handler(request: Request, e: HTTPException):
logger.error(e.detail)
return JSONResponse(status_code=e.status_code, content={"detail": e.detail})
# Catch any other errors and return as 500
@app.exception_handler(Exception)
async def error_handler(request: Request, e: Exception):
return JSONResponse(status_code=500, content={"detail": str(e)})
allowed_origins = os.environ.get("ALLOWED_ORIGINS", None)
if allowed_origins:
from fastapi.middleware.cors import CORSMiddleware
app.add_middleware(
CORSMiddleware,
allow_origins=allowed_origins.split(","),
allow_methods=["OPTIONS", "POST"],
allow_headers=["Content-Type"],
)
set_custom_openapi(app)
# Note(austin) - When FastAPI parses our FormData params,
# it builds lists out of duplicate keys, like so:
# FormData([('key', 'value1'), ('key', 'value2')])
#
# The Speakeasy clients send a more explicit form:
# FormData([('key[]', 'value1'), ('key[]', 'value2')])
#
# FastAPI doesn't understand these, so we need to transform them.
# Can't do this in middleware before the data stream is read, nor in the endpoint
# after the fields are parsed. Thus, we have to patch it into Request.form() on startup.
get_form = Request._get_form
async def patched_get_form(
self,
*,
max_files: int | float = 1000,
max_fields: int | float = 1000,
) -> FormData:
"""
Call the original get_form, and iterate the results
If a key has brackets at the end, remove them before returning the final FormData
Note the extra params here are unused, but needed to match the signature
"""
form_params = await get_form(self)
fixed_params = []
for key, value in form_params.multi_items():
# Transform key[] into key
if key and key.endswith("[]"):
key = key[:-2]
fixed_params.append((key, value))
return FormData(fixed_params)
# Replace the private method with our wrapper
Request._get_form = patched_get_form # type: ignore[assignment]
logger.info("Started Unstructured API")