Skip to content

Commit cfe7794

Browse files
antoniomtzclaude
andcommitted
feat: generate product FAQs from Nemotron in parallel with enrichment
Add FAQ generation as a third parallel task alongside enrichment and policy retrieval, adding zero latency to the pipeline. The new _call_nemotron_generate_faqs() function produces 3-5 locale-aware Q&A pairs grounded in the VLM observation. Frontend uses Kaizen Tabs (Details / FAQs) and Accordion components for a collapsible FAQ display within the Fields card. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 2bd30ab commit cfe7794

File tree

8 files changed

+409
-132
lines changed

8 files changed

+409
-132
lines changed

src/backend/main.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828

2929
from backend.policy import evaluate_policy_compliance
3030
from backend.policy_library import PolicyLibrary
31-
from backend.vlm import extract_vlm_observation, build_enriched_vlm_result
31+
from backend.vlm import extract_vlm_observation, build_enriched_vlm_result, _call_nemotron_generate_faqs
3232
from backend.image import generate_image_variation
3333
from backend.trellis import generate_3d_asset
3434
from backend.config import get_config
@@ -195,7 +195,12 @@ async def vlm_analyze(
195195
"colors": vlm_observation.get("colors", []),
196196
},
197197
)
198-
result, policy_contexts = await asyncio.gather(enrichment_task, retrieval_task)
198+
faq_task = asyncio.to_thread(
199+
_call_nemotron_generate_faqs,
200+
vlm_observation,
201+
locale,
202+
)
203+
result, policy_contexts, faqs = await asyncio.gather(enrichment_task, retrieval_task, faq_task)
199204
if policy_contexts:
200205
logger.info("Policy retrieval returned %d candidate policy record(s); running compliance evaluation.", len(policy_contexts))
201206
product_snapshot = {
@@ -250,6 +255,8 @@ async def vlm_analyze(
250255
payload["enhanced_product"] = result["enhanced_product"]
251256
if result.get("policy_decision"):
252257
payload["policy_decision"] = result["policy_decision"]
258+
if faqs:
259+
payload["faqs"] = faqs
253260

254261
logger.info(f"/vlm/analyze success: title_len={len(payload['title'])} desc_len={len(payload['description'])} locale={locale}")
255262
return JSONResponse(payload)

src/backend/vlm.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,91 @@ def _call_nemotron_apply_branding(
309309
return enhanced_content
310310

311311

312+
def _call_nemotron_generate_faqs(
313+
vlm_observation: Dict[str, Any],
314+
locale: str = "en-US"
315+
) -> list:
316+
"""Generate 3-5 product FAQs from VLM observation using Nemotron.
317+
318+
Runs in parallel with enrichment to add zero latency. On any parse
319+
failure the function returns an empty list so the caller can proceed
320+
without FAQs.
321+
"""
322+
logger.info("[FAQ] Generating FAQs: vlm_keys=%s, locale=%s", list(vlm_observation.keys()), locale)
323+
324+
if not (api_key := os.getenv("NGC_API_KEY")):
325+
raise RuntimeError(NGC_API_KEY_NOT_SET_ERROR)
326+
327+
info = LOCALE_CONFIG.get(locale, {"language": "English", "region": "United States", "country": "United States", "context": "American English"})
328+
llm_config = get_config().get_llm_config()
329+
client = OpenAI(base_url=llm_config['url'], api_key=api_key)
330+
331+
observation_json = json.dumps(vlm_observation, indent=2, ensure_ascii=False)
332+
333+
prompt = f"""/no_think You are a retail product FAQ specialist. Generate 3 to 5 frequently asked questions and answers for the product described below.
334+
335+
PRODUCT VISUAL ANALYSIS:
336+
{observation_json}
337+
338+
TARGET LANGUAGE / REGION: {info['language']} ({info['region']})
339+
{info['context']}
340+
341+
RULES:
342+
- Generate between 3 and 5 FAQs.
343+
- Each FAQ must have a "question" and an "answer" field.
344+
- Questions should cover practical topics a shopper would ask: materials, care instructions, sizing, use cases, compatibility, durability.
345+
- Answers must be helpful, concise (1-3 sentences), and factual.
346+
- ONLY reference attributes visible in the product analysis above. Do NOT fabricate specifications (weight, wattage, capacity, dimensions) unless they appear in the analysis.
347+
- Write questions and answers in {info['language']} appropriate for {info['region']}.
348+
349+
OUTPUT FORMAT:
350+
Return ONLY a valid JSON array. No markdown, no commentary.
351+
Example: [{{"question": "...", "answer": "..."}}, ...]"""
352+
353+
logger.info("[FAQ] Sending prompt to Nemotron (length: %d chars)", len(prompt))
354+
355+
completion = client.chat.completions.create(
356+
model=llm_config['model'],
357+
messages=[{"role": "system", "content": "/no_think"}, {"role": "user", "content": prompt}],
358+
temperature=0.1, top_p=0.9, max_tokens=2048, stream=True,
359+
extra_body={"reasoning_budget": 16384, "chat_template_kwargs": {"enable_thinking": False}}
360+
)
361+
362+
text = "".join(
363+
chunk.choices[0].delta.content
364+
for chunk in completion
365+
if chunk.choices[0].delta and chunk.choices[0].delta.content
366+
)
367+
logger.info("[FAQ] Nemotron response received: %d chars", len(text))
368+
369+
# Parse JSON array (inline — parse_llm_json only handles dicts)
370+
try:
371+
cleaned = text.strip()
372+
for marker in ("```json", "```"):
373+
if marker in cleaned:
374+
start = cleaned.find(marker) + len(marker)
375+
end = cleaned.find("```", start)
376+
if end > start:
377+
cleaned = cleaned[start:end].strip()
378+
break
379+
first_bracket = cleaned.find("[")
380+
last_bracket = cleaned.rfind("]")
381+
if first_bracket != -1 and last_bracket > first_bracket:
382+
cleaned = cleaned[first_bracket : last_bracket + 1]
383+
parsed = json.loads(cleaned)
384+
if isinstance(parsed, list) and all(
385+
isinstance(f, dict) and "question" in f and "answer" in f
386+
for f in parsed
387+
):
388+
logger.info("[FAQ] Generated %d FAQs", len(parsed))
389+
return parsed
390+
logger.warning("[FAQ] Parsed JSON has unexpected structure, returning empty list")
391+
return []
392+
except (json.JSONDecodeError, ValueError) as exc:
393+
logger.warning("[FAQ] JSON parse failed (%s), returning empty list", exc)
394+
return []
395+
396+
312397
def _call_nemotron_enhance(
313398
vlm_output: Dict[str, Any],
314399
product_data: Optional[Dict[str, Any]] = None,

src/ui/app/page.tsx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,8 @@ function Home() {
205205
colors: analyzeData.colors || [],
206206
tags: analyzeData.tags || [],
207207
categories: analyzeData.categories || [],
208-
policyDecision: analyzeData.policyDecision
208+
policyDecision: analyzeData.policyDecision,
209+
faqs: analyzeData.faqs || []
209210
});
210211
setIsAnalyzingFields(false);
211212

0 commit comments

Comments
 (0)