Merge pull request #44 from NVIDIA-AI-Blueprints/antoniomtz/shared-parse-llm-json-utility

antoniomtz · web-flow · commit 0d160e534ddf · 2026-02-21T13:18:27.000-08:00
refactor: extract shared parse_llm_json utility and add .env.example
diff --git a/.env.example b/.env.example
@@ -0,0 +1,5 @@
+# NVIDIA NGC API key (https://build.nvidia.com/)
+NGC_API_KEY=
+
+# HuggingFace token for FLUX model (https://huggingface.co/settings/tokens)
+HF_TOKEN=
diff --git a/README.md b/README.md
@@ -37,10 +37,8 @@ A GenAI-powered catalog enrichment system that transforms basic product images i
 ## Tech Stack
 
 **Backend:**
-- FastAPI + Uvicorn (ASGI server)
+- FastAPI + Uvicorn
 - Python 3.11+
-- OpenAI client (NVIDIA endpoint)
-- PIL (Pillow) for image processing
 
 **Frontend:**
 - Next.js 15 with React 19
@@ -90,11 +88,10 @@ For self-hosting the NIM microservices locally, the following GPU requirements a
 
 ### Environment Setup
 
-Create a `.env` file in the project root:
+Copy the example env file and fill in your keys:
 
 ```bash
-NGC_API_KEY=your_nvidia_api_key_here
-HF_TOKEN=your_huggingface_token_here
+cp .env.example .env
 ```
 
 **Getting API Keys:**
diff --git a/src/backend/image.py b/src/backend/image.py
@@ -36,6 +36,7 @@
 from dotenv import load_dotenv
 from openai import OpenAI
 from backend.config import get_config
+from backend.utils import parse_llm_json
 from backend.reflection import evaluate_image_quality
 
 load_dotenv()
@@ -130,33 +131,11 @@ def _call_planner_llm(title: str, description: str, categories: List[str], local
     text = "".join(chunk.choices[0].delta.content for chunk in completion if chunk.choices[0].delta and chunk.choices[0].delta.content).strip()
     logger.info("Planner LLM response received: %s", text)
     
-    json_text = text
-    if "```json" in text:
-        try:
-            start = text.find("```json") + len("```json")
-            end = text.find("```", start)
-            if end > start:
-                json_text = text[start:end].strip()
-                logger.info("Extracted JSON from markdown: %s", json_text)
-        except Exception as e:
-            logger.warning("Failed to extract JSON from markdown: %s", e)
-    elif "```" in text:
-        try:
-            start = text.find("```") + len("```")
-            end = text.find("```", start)
-            if end > start:
-                json_text = text[start:end].strip()
-                logger.info("Extracted JSON from generic code block: %s", json_text)
-        except Exception as e:
-            logger.warning("Failed to extract JSON from code block: %s", e)
-    
-    try:
-        plan = json.loads(json_text)
-        if isinstance(plan, dict):
-            logger.info("Successfully parsed planner JSON with keys: %s", list(plan.keys()))
-            return plan
-    except Exception as e:
-        logger.warning("Planner LLM returned non-JSON; using fallback plan. Parse error: %s", e)
+    parsed = parse_llm_json(text)
+    if parsed is not None:
+        logger.info("Successfully parsed planner JSON with keys: %s", list(parsed.keys()))
+        return parsed
+    logger.warning("Planner LLM returned non-JSON; using fallback plan.")
     
     # Randomized fallback options for variety
     backgrounds = [
diff --git a/src/backend/reflection.py b/src/backend/reflection.py
@@ -15,7 +15,6 @@
 
 """Image quality evaluation for generated variations using VLM."""
 import os
-import json
 import base64
 import logging
 from typing import Optional, Dict, Any
@@ -25,6 +24,7 @@
 from dotenv import load_dotenv
 from openai import OpenAI
 from backend.config import get_config
+from backend.utils import parse_llm_json
 
 load_dotenv()
 
@@ -140,25 +140,21 @@ def _encode_image_to_base64(image_bytes: bytes, target_format: str = "png") -> s
 
 def _parse_quality_response(response_text: str) -> Optional[Dict[str, Any]]:
     """Parse VLM quality response, handling JSON or markdown-wrapped JSON."""
-    try:
-        text = response_text.strip()
-        
-        if "```" in text:
-            start = text.find("```json") + 7 if "```json" in text else text.find("```") + 3
-            end = text.find("```", start)
-            text = text[start:end].strip() if end > start else text
-        
-        data = json.loads(text)
-        
-        if isinstance(data, dict) and "value" in data:
-            score = max(0.0, min(100.0, float(data["value"])))
-            issues = data.get("issues", []) if isinstance(data.get("issues"), list) else []
-            return {"score": score, "issues": issues}
-        
+    data = parse_llm_json(response_text)
+    if data is None:
+        logger.warning(f"Parse failed - Response: {response_text}")
+        return None
+
+    if "value" not in data:
         logger.warning(f"Response missing 'value': {data}")
         return None
-        
-    except (json.JSONDecodeError, ValueError, TypeError) as e:
-        logger.warning(f"Parse failed: {e} - Response: {response_text}")
+
+    try:
+        score = max(0.0, min(100.0, float(data["value"])))
+    except (ValueError, TypeError) as e:
+        logger.warning(f"Invalid 'value': {e} - Response: {response_text}")
         return None
 
+    issues = data.get("issues", []) if isinstance(data.get("issues"), list) else []
+    return {"score": score, "issues": issues}
+
diff --git a/src/backend/trellis.py b/src/backend/trellis.py
@@ -64,7 +64,8 @@ async def generate_3d_asset(
         "ss_cfg_scale": ss_cfg_scale,
         "slat_sampling_steps": slat_sampling_steps,
         "ss_sampling_steps": ss_sampling_steps,
-        "seed": seed
+        "seed": seed,
+        "disable_safety_checker": True
     }
     
     headers = {"Accept": "application/octet-stream", "Content-Type": "application/json"}
diff --git a/src/backend/utils.py b/src/backend/utils.py
@@ -0,0 +1,56 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import re
+from typing import Optional
+
+
+def parse_llm_json(
+    text: str,
+    *,
+    extract_braces: bool = False,
+    strip_comments: bool = False,
+) -> Optional[dict]:
+    """Parse a JSON dict from an LLM response, handling common formatting issues.
+
+    Returns the parsed dict, or None on any failure.
+    """
+    text = text.strip()
+
+    # Strip markdown fences
+    for marker in ("```json", "```"):
+        if marker in text:
+            start = text.find(marker) + len(marker)
+            end = text.find("```", start)
+            if end > start:
+                text = text[start:end].strip()
+                break
+
+    if extract_braces:
+        first_brace = text.find("{")
+        last_brace = text.rfind("}")
+        if first_brace != -1 and last_brace != -1 and last_brace > first_brace:
+            text = text[first_brace : last_brace + 1]
+
+    if strip_comments:
+        text = re.sub(r"//.*?(?=\n|$)", "", text)
+        text = re.sub(r"/\*.*?\*/", "", text, flags=re.DOTALL)
+
+    try:
+        parsed = json.loads(text)
+        return parsed if isinstance(parsed, dict) else None
+    except Exception:
+        return None
diff --git a/src/backend/vlm.py b/src/backend/vlm.py
@@ -17,12 +17,12 @@
 import json
 import base64
 import logging
-import re
 from typing import Optional, List, Dict, Any
 
 from dotenv import load_dotenv
 from openai import OpenAI
 from backend.config import get_config
+from backend.utils import parse_llm_json
 
 load_dotenv()
 
@@ -117,34 +117,11 @@ def _call_nemotron_enhance_vlm(
     text = "".join(chunk.choices[0].delta.content for chunk in completion if chunk.choices[0].delta and chunk.choices[0].delta.content)
     logger.info("[Step 1] Nemotron response received: %d chars", len(text))
 
-    json_text = text.strip()
-    for marker in ("```json", "```"):
-        if marker in json_text:
-            try:
-                start = json_text.find(marker) + len(marker)
-                end = json_text.find("```", start)
-                if end > start:
-                    json_text = json_text[start:end].strip()
-                    break
-            except Exception as e:
-                logger.warning(f"[Step 1] Failed to extract JSON from {marker}: {e}")
-    
-    first_brace = json_text.find('{')
-    last_brace = json_text.rfind('}')
-    if first_brace != -1 and last_brace != -1 and last_brace > first_brace:
-        json_text = json_text[first_brace:last_brace+1]
-    
-    json_text = re.sub(r'//.*?(?=\n|$)', '', json_text)
-    json_text = re.sub(r'/\*.*?\*/', '', json_text, flags=re.DOTALL)
-
-    try:
-        parsed = json.loads(json_text)
-        if isinstance(parsed, dict):
-            logger.info("[Step 1] Enhancement successful: enhanced_keys=%s", list(parsed.keys()))
-            return parsed
-    except Exception as e:
-        logger.warning(f"[Step 1] JSON parse error: {e}, using VLM output")
-    
+    parsed = parse_llm_json(text, extract_braces=True, strip_comments=True)
+    if parsed is not None:
+        logger.info("[Step 1] Enhancement successful: enhanced_keys=%s", list(parsed.keys()))
+        return parsed
+    logger.warning("[Step 1] JSON parse failed, using VLM output")
     return vlm_output
 
 
@@ -244,34 +221,11 @@ def _call_nemotron_apply_branding(
     text = "".join(chunk.choices[0].delta.content for chunk in completion if chunk.choices[0].delta and chunk.choices[0].delta.content)
     logger.info("[Step 2] Nemotron response received: %d chars", len(text))
 
-    json_text = text.strip()
-    for marker in ("```json", "```"):
-        if marker in json_text:
-            try:
-                start = json_text.find(marker) + len(marker)
-                end = json_text.find("```", start)
-                if end > start:
-                    json_text = json_text[start:end].strip()
-                    break
-            except Exception as e:
-                logger.warning(f"[Step 2] Failed to extract JSON from {marker}: {e}")
-    
-    first_brace = json_text.find('{')
-    last_brace = json_text.rfind('}')
-    if first_brace != -1 and last_brace != -1 and last_brace > first_brace:
-        json_text = json_text[first_brace:last_brace+1]
-    
-    json_text = re.sub(r'//.*?(?=\n|$)', '', json_text)
-    json_text = re.sub(r'/\*.*?\*/', '', json_text, flags=re.DOTALL)
-
-    try:
-        parsed = json.loads(json_text)
-        if isinstance(parsed, dict):
-            logger.info("[Step 2] Brand alignment successful: keys=%s", list(parsed.keys()))
-            return parsed
-    except Exception as e:
-        logger.warning(f"[Step 2] JSON parse error: {e}, returning Step 1 content unchanged")
-    
+    parsed = parse_llm_json(text, extract_braces=True, strip_comments=True)
+    if parsed is not None:
+        logger.info("[Step 2] Brand alignment successful: keys=%s", list(parsed.keys()))
+        return parsed
+    logger.warning("[Step 2] JSON parse failed, returning Step 1 content unchanged")
     return enhanced_content
 
 
@@ -374,23 +328,10 @@ def _call_vlm(image_bytes: bytes, content_type: str) -> Dict[str, Any]:
     text = "".join(chunk.choices[0].delta.content for chunk in completion if chunk.choices[0].delta and chunk.choices[0].delta.content)
     logger.info("VLM response received: %d chars", len(text))
 
-    json_text = text.strip()
-    for marker in ("```json", "```"):
-        if marker in json_text:
-            try:
-                start = json_text.find(marker) + len(marker)
-                end = json_text.find("```", start)
-                if end > start:
-                    json_text = json_text[start:end].strip()
-                    break
-            except Exception:
-                pass
-
-    try:
-        parsed = json.loads(json_text)
-        return parsed if isinstance(parsed, dict) else {"title": "", "description": json_text, "categories": ["uncategorized"], "tags": [], "colors": []}
-    except Exception:
-        return {"title": "", "description": json_text, "categories": ["uncategorized"], "tags": [], "colors": []}
+    parsed = parse_llm_json(text)
+    if parsed is not None:
+        return parsed
+    return {"title": "", "description": text.strip(), "categories": ["uncategorized"], "tags": [], "colors": []}
 
 def run_vlm_analysis(
     image_bytes: bytes,
diff --git a/tests/test_utils.py b/tests/test_utils.py
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -64,7 +64,8 @@ async def generate_3d_asset(`
`64`	`64`	`"ss_cfg_scale": ss_cfg_scale,`
`65`	`65`	`"slat_sampling_steps": slat_sampling_steps,`
`66`	`66`	`"ss_sampling_steps": ss_sampling_steps,`
`67`		`- "seed": seed`
	`67`	`+ "seed": seed,`
	`68`	`+ "disable_safety_checker": True`
`68`	`69`	`}`
`69`	`70`
`70`	`71`	`headers = {"Accept": "application/octet-stream", "Content-Type": "application/json"}`