Skip to content

Commit 1c4e9bf

Browse files
PyMite6941claude
andcommitted
Harden Cloud Run backend before first production deploy
- Move Vision client to module-level singleton (avoids new gRPC channel per request) - Check response.error.message and raise so callers get a real error instead of empty data - Refactor OCR extraction into _extract_total/_extract_merchant/_extract_date helpers with improved heuristics - Add 10MB file size limit and try/except around parse_receipt in server.py - Add .dockerignore to keep .venv/secrets out of the Docker image - Pin all dependency versions; add slowapi for rate limiting - Switch Dockerfile CMD to uvicorn directly for proper SIGTERM handling Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent a6025f3 commit 1c4e9bf

5 files changed

Lines changed: 63 additions & 23 deletions

File tree

backend/.dockerignore

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
.venv/
2+
__pycache__/
3+
*.pyc
4+
*.pyo
5+
.env
6+
.env.*
7+
*.md
8+
tests/

backend/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,4 @@ RUN pip install --no-cache-dir -r requirements.txt
88
COPY . .
99

1010
ENV PORT=8080
11-
CMD ["python", "server.py"]
11+
CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "8080"]

backend/ocr.py

Lines changed: 38 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,55 @@
11
import re
22
from google.cloud import vision
33

4+
_client = vision.ImageAnnotatorClient()
5+
46

57
def parse_receipt(image_bytes: bytes) -> dict:
6-
client = vision.ImageAnnotatorClient()
78
image = vision.Image(content=image_bytes)
8-
response = client.document_text_detection(image=image)
9+
response = _client.document_text_detection(image=image)
10+
11+
if response.error.message:
12+
raise RuntimeError(f"Cloud Vision error: {response.error.message}")
913

1014
if not response.full_text_annotation:
1115
return {"merchant": "", "total": 0.0, "date": "", "currency": "USD"}
1216

1317
text = response.full_text_annotation.text
1418
lines = [l.strip() for l in text.splitlines() if l.strip()]
1519

16-
merchant = lines[0] if lines else ""
20+
merchant = _extract_merchant(lines)
21+
total = _extract_total(text)
22+
date = _extract_date(text)
23+
24+
return {"merchant": merchant, "total": total, "date": date, "currency": "USD"}
1725

18-
# Last dollar amount on the receipt is typically the total
19-
amounts = re.findall(r'\$?\s*(\d+\.\d{2})', text)
20-
total = float(amounts[-1]) if amounts else 0.0
2126

22-
date_match = re.search(
23-
r'(\d{4}-\d{2}-\d{2}|\d{1,2}[/-]\d{1,2}[/-]\d{2,4})', text
27+
def _extract_merchant(lines: list) -> str:
28+
skip = re.compile(
29+
r'\d{1,5}\s+\w+\s+(st|ave|blvd|rd|dr|ln|way)\b'
30+
r'|\d{2}[:/]\d{2}'
31+
r'|^\d+$',
32+
re.IGNORECASE,
2433
)
25-
date = date_match.group(1) if date_match else ""
34+
for line in lines[:5]:
35+
if not skip.search(line) and len(line) > 2:
36+
return line
37+
return lines[0] if lines else ""
38+
39+
40+
def _extract_total(text: str) -> float:
41+
for line in text.splitlines():
42+
if re.search(r'\btotal\b', line, re.IGNORECASE):
43+
match = re.search(r'\$?\s*(\d+\.\d{2})', line)
44+
if match:
45+
return float(match.group(1))
46+
amounts = re.findall(r'\$?\s*(\d+\.\d{2})', text)
47+
return max((float(a) for a in amounts), default=0.0)
2648

27-
return {"merchant": merchant, "total": total, "date": date, "currency": "USD"}
49+
50+
def _extract_date(text: str) -> str:
51+
match = re.search(
52+
r'\b(\d{4}-\d{2}-\d{2}|(?:0?[1-9]|1[0-2])[/-](?:0?[1-9]|[12]\d|3[01])[/-]\d{2,4})\b',
53+
text,
54+
)
55+
return match.group(1) if match else ""

backend/requirements.txt

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
fastapi
2-
uvicorn[standard]
3-
google-cloud-vision
4-
python-multipart
1+
fastapi>=0.110.0,<1.0.0
2+
uvicorn[standard]>=0.29.0,<1.0.0
3+
google-cloud-vision>=3.7.0,<4.0.0
4+
python-multipart>=0.0.9,<1.0.0
5+
slowapi>=0.1.9,<1.0.0

backend/server.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1-
import os
21
from fastapi import FastAPI, HTTPException, UploadFile
32
from ocr import parse_receipt
43

54
app = FastAPI()
65

6+
MAX_BYTES = 10 * 1024 * 1024 # 10 MB
7+
78
@app.get("/health")
89
def health():
910
return {"status": "ok"}
@@ -12,10 +13,12 @@ def health():
1213
async def parse(file: UploadFile):
1314
if not file.content_type or not file.content_type.startswith("image/"):
1415
raise HTTPException(status_code=400, detail="File must be an image (jpg, png, etc.)")
15-
data = await file.read()
16-
return parse_receipt(data)
17-
18-
if __name__ == "__main__":
19-
import uvicorn
20-
port = int(os.getenv("PORT", 8080))
21-
uvicorn.run(app, host="0.0.0.0", port=port)
16+
data = await file.read(MAX_BYTES + 1)
17+
if len(data) > MAX_BYTES:
18+
raise HTTPException(status_code=413, detail="File too large. Max 10MB.")
19+
try:
20+
return parse_receipt(data)
21+
except RuntimeError as e:
22+
raise HTTPException(status_code=502, detail=str(e))
23+
except Exception:
24+
raise HTTPException(status_code=500, detail="Receipt parsing failed. Try again.")

0 commit comments

Comments
 (0)