Skip to content

Commit 0d160e5

Browse files
authored
Merge pull request #44 from NVIDIA-AI-Blueprints/antoniomtz/shared-parse-llm-json-utility
refactor: extract shared parse_llm_json utility and add .env.example
2 parents d2a3667 + 82eb203 commit 0d160e5

9 files changed

Lines changed: 478 additions & 265 deletions

File tree

.env.example

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# NVIDIA NGC API key (https://build.nvidia.com/)
2+
NGC_API_KEY=
3+
4+
# HuggingFace token for FLUX model (https://huggingface.co/settings/tokens)
5+
HF_TOKEN=

README.md

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,8 @@ A GenAI-powered catalog enrichment system that transforms basic product images i
3737
## Tech Stack
3838

3939
**Backend:**
40-
- FastAPI + Uvicorn (ASGI server)
40+
- FastAPI + Uvicorn
4141
- Python 3.11+
42-
- OpenAI client (NVIDIA endpoint)
43-
- PIL (Pillow) for image processing
4442

4543
**Frontend:**
4644
- Next.js 15 with React 19
@@ -90,11 +88,10 @@ For self-hosting the NIM microservices locally, the following GPU requirements a
9088

9189
### Environment Setup
9290

93-
Create a `.env` file in the project root:
91+
Copy the example env file and fill in your keys:
9492

9593
```bash
96-
NGC_API_KEY=your_nvidia_api_key_here
97-
HF_TOKEN=your_huggingface_token_here
94+
cp .env.example .env
9895
```
9996

10097
**Getting API Keys:**

src/backend/image.py

Lines changed: 6 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
from dotenv import load_dotenv
3737
from openai import OpenAI
3838
from backend.config import get_config
39+
from backend.utils import parse_llm_json
3940
from backend.reflection import evaluate_image_quality
4041

4142
load_dotenv()
@@ -130,33 +131,11 @@ def _call_planner_llm(title: str, description: str, categories: List[str], local
130131
text = "".join(chunk.choices[0].delta.content for chunk in completion if chunk.choices[0].delta and chunk.choices[0].delta.content).strip()
131132
logger.info("Planner LLM response received: %s", text)
132133

133-
json_text = text
134-
if "```json" in text:
135-
try:
136-
start = text.find("```json") + len("```json")
137-
end = text.find("```", start)
138-
if end > start:
139-
json_text = text[start:end].strip()
140-
logger.info("Extracted JSON from markdown: %s", json_text)
141-
except Exception as e:
142-
logger.warning("Failed to extract JSON from markdown: %s", e)
143-
elif "```" in text:
144-
try:
145-
start = text.find("```") + len("```")
146-
end = text.find("```", start)
147-
if end > start:
148-
json_text = text[start:end].strip()
149-
logger.info("Extracted JSON from generic code block: %s", json_text)
150-
except Exception as e:
151-
logger.warning("Failed to extract JSON from code block: %s", e)
152-
153-
try:
154-
plan = json.loads(json_text)
155-
if isinstance(plan, dict):
156-
logger.info("Successfully parsed planner JSON with keys: %s", list(plan.keys()))
157-
return plan
158-
except Exception as e:
159-
logger.warning("Planner LLM returned non-JSON; using fallback plan. Parse error: %s", e)
134+
parsed = parse_llm_json(text)
135+
if parsed is not None:
136+
logger.info("Successfully parsed planner JSON with keys: %s", list(parsed.keys()))
137+
return parsed
138+
logger.warning("Planner LLM returned non-JSON; using fallback plan.")
160139

161140
# Randomized fallback options for variety
162141
backgrounds = [

src/backend/reflection.py

Lines changed: 15 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515

1616
"""Image quality evaluation for generated variations using VLM."""
1717
import os
18-
import json
1918
import base64
2019
import logging
2120
from typing import Optional, Dict, Any
@@ -25,6 +24,7 @@
2524
from dotenv import load_dotenv
2625
from openai import OpenAI
2726
from backend.config import get_config
27+
from backend.utils import parse_llm_json
2828

2929
load_dotenv()
3030

@@ -140,25 +140,21 @@ def _encode_image_to_base64(image_bytes: bytes, target_format: str = "png") -> s
140140

141141
def _parse_quality_response(response_text: str) -> Optional[Dict[str, Any]]:
142142
"""Parse VLM quality response, handling JSON or markdown-wrapped JSON."""
143-
try:
144-
text = response_text.strip()
145-
146-
if "```" in text:
147-
start = text.find("```json") + 7 if "```json" in text else text.find("```") + 3
148-
end = text.find("```", start)
149-
text = text[start:end].strip() if end > start else text
150-
151-
data = json.loads(text)
152-
153-
if isinstance(data, dict) and "value" in data:
154-
score = max(0.0, min(100.0, float(data["value"])))
155-
issues = data.get("issues", []) if isinstance(data.get("issues"), list) else []
156-
return {"score": score, "issues": issues}
157-
143+
data = parse_llm_json(response_text)
144+
if data is None:
145+
logger.warning(f"Parse failed - Response: {response_text}")
146+
return None
147+
148+
if "value" not in data:
158149
logger.warning(f"Response missing 'value': {data}")
159150
return None
160-
161-
except (json.JSONDecodeError, ValueError, TypeError) as e:
162-
logger.warning(f"Parse failed: {e} - Response: {response_text}")
151+
152+
try:
153+
score = max(0.0, min(100.0, float(data["value"])))
154+
except (ValueError, TypeError) as e:
155+
logger.warning(f"Invalid 'value': {e} - Response: {response_text}")
163156
return None
164157

158+
issues = data.get("issues", []) if isinstance(data.get("issues"), list) else []
159+
return {"score": score, "issues": issues}
160+

src/backend/trellis.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ async def generate_3d_asset(
6464
"ss_cfg_scale": ss_cfg_scale,
6565
"slat_sampling_steps": slat_sampling_steps,
6666
"ss_sampling_steps": ss_sampling_steps,
67-
"seed": seed
67+
"seed": seed,
68+
"disable_safety_checker": True
6869
}
6970

7071
headers = {"Accept": "application/octet-stream", "Content-Type": "application/json"}

src/backend/utils.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
import json
17+
import re
18+
from typing import Optional
19+
20+
21+
def parse_llm_json(
22+
text: str,
23+
*,
24+
extract_braces: bool = False,
25+
strip_comments: bool = False,
26+
) -> Optional[dict]:
27+
"""Parse a JSON dict from an LLM response, handling common formatting issues.
28+
29+
Returns the parsed dict, or None on any failure.
30+
"""
31+
text = text.strip()
32+
33+
# Strip markdown fences
34+
for marker in ("```json", "```"):
35+
if marker in text:
36+
start = text.find(marker) + len(marker)
37+
end = text.find("```", start)
38+
if end > start:
39+
text = text[start:end].strip()
40+
break
41+
42+
if extract_braces:
43+
first_brace = text.find("{")
44+
last_brace = text.rfind("}")
45+
if first_brace != -1 and last_brace != -1 and last_brace > first_brace:
46+
text = text[first_brace : last_brace + 1]
47+
48+
if strip_comments:
49+
text = re.sub(r"//.*?(?=\n|$)", "", text)
50+
text = re.sub(r"/\*.*?\*/", "", text, flags=re.DOTALL)
51+
52+
try:
53+
parsed = json.loads(text)
54+
return parsed if isinstance(parsed, dict) else None
55+
except Exception:
56+
return None

src/backend/vlm.py

Lines changed: 15 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@
1717
import json
1818
import base64
1919
import logging
20-
import re
2120
from typing import Optional, List, Dict, Any
2221

2322
from dotenv import load_dotenv
2423
from openai import OpenAI
2524
from backend.config import get_config
25+
from backend.utils import parse_llm_json
2626

2727
load_dotenv()
2828

@@ -117,34 +117,11 @@ def _call_nemotron_enhance_vlm(
117117
text = "".join(chunk.choices[0].delta.content for chunk in completion if chunk.choices[0].delta and chunk.choices[0].delta.content)
118118
logger.info("[Step 1] Nemotron response received: %d chars", len(text))
119119

120-
json_text = text.strip()
121-
for marker in ("```json", "```"):
122-
if marker in json_text:
123-
try:
124-
start = json_text.find(marker) + len(marker)
125-
end = json_text.find("```", start)
126-
if end > start:
127-
json_text = json_text[start:end].strip()
128-
break
129-
except Exception as e:
130-
logger.warning(f"[Step 1] Failed to extract JSON from {marker}: {e}")
131-
132-
first_brace = json_text.find('{')
133-
last_brace = json_text.rfind('}')
134-
if first_brace != -1 and last_brace != -1 and last_brace > first_brace:
135-
json_text = json_text[first_brace:last_brace+1]
136-
137-
json_text = re.sub(r'//.*?(?=\n|$)', '', json_text)
138-
json_text = re.sub(r'/\*.*?\*/', '', json_text, flags=re.DOTALL)
139-
140-
try:
141-
parsed = json.loads(json_text)
142-
if isinstance(parsed, dict):
143-
logger.info("[Step 1] Enhancement successful: enhanced_keys=%s", list(parsed.keys()))
144-
return parsed
145-
except Exception as e:
146-
logger.warning(f"[Step 1] JSON parse error: {e}, using VLM output")
147-
120+
parsed = parse_llm_json(text, extract_braces=True, strip_comments=True)
121+
if parsed is not None:
122+
logger.info("[Step 1] Enhancement successful: enhanced_keys=%s", list(parsed.keys()))
123+
return parsed
124+
logger.warning("[Step 1] JSON parse failed, using VLM output")
148125
return vlm_output
149126

150127

@@ -244,34 +221,11 @@ def _call_nemotron_apply_branding(
244221
text = "".join(chunk.choices[0].delta.content for chunk in completion if chunk.choices[0].delta and chunk.choices[0].delta.content)
245222
logger.info("[Step 2] Nemotron response received: %d chars", len(text))
246223

247-
json_text = text.strip()
248-
for marker in ("```json", "```"):
249-
if marker in json_text:
250-
try:
251-
start = json_text.find(marker) + len(marker)
252-
end = json_text.find("```", start)
253-
if end > start:
254-
json_text = json_text[start:end].strip()
255-
break
256-
except Exception as e:
257-
logger.warning(f"[Step 2] Failed to extract JSON from {marker}: {e}")
258-
259-
first_brace = json_text.find('{')
260-
last_brace = json_text.rfind('}')
261-
if first_brace != -1 and last_brace != -1 and last_brace > first_brace:
262-
json_text = json_text[first_brace:last_brace+1]
263-
264-
json_text = re.sub(r'//.*?(?=\n|$)', '', json_text)
265-
json_text = re.sub(r'/\*.*?\*/', '', json_text, flags=re.DOTALL)
266-
267-
try:
268-
parsed = json.loads(json_text)
269-
if isinstance(parsed, dict):
270-
logger.info("[Step 2] Brand alignment successful: keys=%s", list(parsed.keys()))
271-
return parsed
272-
except Exception as e:
273-
logger.warning(f"[Step 2] JSON parse error: {e}, returning Step 1 content unchanged")
274-
224+
parsed = parse_llm_json(text, extract_braces=True, strip_comments=True)
225+
if parsed is not None:
226+
logger.info("[Step 2] Brand alignment successful: keys=%s", list(parsed.keys()))
227+
return parsed
228+
logger.warning("[Step 2] JSON parse failed, returning Step 1 content unchanged")
275229
return enhanced_content
276230

277231

@@ -374,23 +328,10 @@ def _call_vlm(image_bytes: bytes, content_type: str) -> Dict[str, Any]:
374328
text = "".join(chunk.choices[0].delta.content for chunk in completion if chunk.choices[0].delta and chunk.choices[0].delta.content)
375329
logger.info("VLM response received: %d chars", len(text))
376330

377-
json_text = text.strip()
378-
for marker in ("```json", "```"):
379-
if marker in json_text:
380-
try:
381-
start = json_text.find(marker) + len(marker)
382-
end = json_text.find("```", start)
383-
if end > start:
384-
json_text = json_text[start:end].strip()
385-
break
386-
except Exception:
387-
pass
388-
389-
try:
390-
parsed = json.loads(json_text)
391-
return parsed if isinstance(parsed, dict) else {"title": "", "description": json_text, "categories": ["uncategorized"], "tags": [], "colors": []}
392-
except Exception:
393-
return {"title": "", "description": json_text, "categories": ["uncategorized"], "tags": [], "colors": []}
331+
parsed = parse_llm_json(text)
332+
if parsed is not None:
333+
return parsed
334+
return {"title": "", "description": text.strip(), "categories": ["uncategorized"], "tags": [], "colors": []}
394335

395336
def run_vlm_analysis(
396337
image_bytes: bytes,

0 commit comments

Comments
 (0)