Skip to content

Commit 830c94d

Browse files
committed
feat(scan): multi-garment detection — detect and save all items from one photo
1 parent 83bb2df commit 830c94d

3 files changed

Lines changed: 245 additions & 127 deletions

File tree

backend/app/api/routes.py

Lines changed: 56 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from typing import Optional
66
from app.services import image_ingestion, clothing_detector, llm_feedback, response_shaper
77
from app.services import tts_service
8-
from app.models.schemas import AnalyzeResponse, QuickScanResponse
8+
from app.models.schemas import AnalyzeResponse, QuickScanResponse, GarmentScanItem
99
from app.core.config import GEMINI_MODEL
1010
from app.errors.handlers import ImageQualityError
1111

@@ -64,25 +64,38 @@ async def analyze(
6464
QUICK_SCAN_SCHEMA = {
6565
"type": "object",
6666
"properties": {
67-
"suggested_name": {"type": "string"},
68-
"category": {"type": "string"},
69-
"color": {"type": "string"},
70-
"short_description": {"type": "string"},
71-
"long_description": {"type": "string"},
67+
"items": {
68+
"type": "array",
69+
"items": {
70+
"type": "object",
71+
"properties": {
72+
"suggested_name": {"type": "string"},
73+
"category": {"type": "string"},
74+
"color": {"type": "string"},
75+
"short_description": {"type": "string"},
76+
"long_description": {"type": "string"},
77+
},
78+
"required": ["suggested_name", "category", "color", "short_description", "long_description"],
79+
},
80+
}
7281
},
73-
"required": ["suggested_name", "category", "color", "short_description", "long_description"],
82+
"required": ["items"],
7483
}
7584

7685

77-
QUICK_SCAN_PROMPT = (
78-
"Identify the clothing item in this image. Return JSON with:\n"
79-
"- suggested_name: short item name (e.g. 'Navy Graphic Tee')\n"
80-
"- category: one of tops/bottoms/outerwear/shoes/accessories/other\n"
81-
"- color: primary color description\n"
82-
"- short_description: one sentence, max 15 words, describing what you see (fabric, color, print)\n"
83-
"- long_description: 3-4 sentences covering fabric feel, cut/fit, graphic/print details if any, "
84-
"and what occasions or styles it suits. Concrete and tactile — no vague words like 'nice' or 'great'."
85-
)
86+
def _build_quick_scan_prompt(categories: list[str]) -> str:
87+
cats = ", ".join(categories) if categories else "clothing"
88+
return (
89+
f"This image contains the following garment types detected by the clothing classifier: {cats}.\n"
90+
"For EACH detected garment type, return one entry in the items array with:\n"
91+
"- suggested_name: short descriptive name (e.g. 'Navy Graphic Tee')\n"
92+
"- category: one of tops/bottoms/footwear/outerwear/dress\n"
93+
"- color: primary color\n"
94+
"- short_description: one sentence, max 15 words, describing the specific garment\n"
95+
"- long_description: 3-4 sentences covering fabric feel, cut/fit, print details if any, "
96+
"and what occasions it suits. Concrete and tactile — no vague words like 'nice' or 'great'.\n"
97+
"Only describe garments visible in the image. Do not invent items."
98+
)
8699

87100

88101
@router.post("/quick-scan", response_model=QuickScanResponse)
@@ -93,20 +106,22 @@ async def quick_scan(image: UploadFile = File(...)):
93106
raw = await image.read()
94107

95108
image_rgb = image_ingestion.ingest(raw)
96-
await asyncio.to_thread(clothing_detector.detect, image_rgb)
109+
detection = await asyncio.to_thread(clothing_detector.detect, image_rgb)
97110

98111
img = Image.open(io.BytesIO(raw)).convert("RGB")
99112
buf = io.BytesIO()
100113
img.save(buf, format="JPEG", quality=85)
101114
jpeg = buf.getvalue()
102115

116+
prompt = _build_quick_scan_prompt(detection.categories)
117+
103118
gemini_unavailable = False
104119
try:
105120
response = _gemini().models.generate_content(
106121
model=GEMINI_MODEL,
107122
contents=[
108123
types.Part.from_bytes(data=jpeg, mime_type="image/jpeg"),
109-
QUICK_SCAN_PROMPT,
124+
prompt,
110125
],
111126
config=types.GenerateContentConfig(
112127
response_mime_type="application/json",
@@ -121,16 +136,33 @@ async def quick_scan(image: UploadFile = File(...)):
121136
gemini_unavailable = True
122137
data = groq_fallback.vision_json(
123138
jpeg,
124-
QUICK_SCAN_PROMPT,
139+
prompt,
125140
schema_hint=str(QUICK_SCAN_SCHEMA),
126141
)
127142

128-
data["description"] = data.get("short_description", "")
143+
raw_items = data.get("items", [])
144+
if not raw_items:
145+
# Fallback: wrap legacy single-item response
146+
raw_items = [data]
147+
129148
if gemini_unavailable:
130-
data["short_description"] = (
131-
groq_fallback.FALLBACK_NOTE + " " + data.get("short_description", "")
132-
).strip()
133-
return QuickScanResponse(**data)
149+
for item in raw_items:
150+
item["short_description"] = (
151+
groq_fallback.FALLBACK_NOTE + " " + item.get("short_description", "")
152+
).strip()
153+
154+
items = [GarmentScanItem(**{k: item.get(k, "") for k in GarmentScanItem.model_fields}) for item in raw_items]
155+
primary = items[0]
156+
157+
return QuickScanResponse(
158+
items=items,
159+
suggested_name=primary.suggested_name,
160+
category=primary.category,
161+
color=primary.color,
162+
short_description=primary.short_description,
163+
long_description=primary.long_description,
164+
description=primary.short_description,
165+
)
134166

135167

136168
# ---------------------------------------------------------------------------

backend/app/models/schemas.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,22 @@ class AnalyzeResponse(BaseModel):
4242
latency_ms: int
4343

4444

45+
class GarmentScanItem(BaseModel):
46+
suggested_name: str
47+
category: str
48+
color: str
49+
short_description: str
50+
long_description: str
51+
52+
4553
class QuickScanResponse(BaseModel):
54+
items: list[GarmentScanItem] # all detected garments (multi-scan)
55+
# Primary item fields kept for backward compat (mirrors items[0])
4656
suggested_name: str
4757
category: str
48-
description: str # kept for backward compat
49-
short_description: str # 1 sentence for quick display/TTS
50-
long_description: str # 3-4 sentences stored in wardrobe
58+
description: str
59+
short_description: str
60+
long_description: str
5161
color: str
5262

5363

0 commit comments

Comments
 (0)