|
13 | 13 |
|
14 | 14 | import json |
15 | 15 | import re |
| 16 | +from typing import Any, Optional |
16 | 17 |
|
17 | 18 | import markdown |
18 | 19 |
|
|
33 | 34 | from casts.blog_writer.modules.tools import fetch_content, generate_image |
34 | 35 |
|
35 | 36 |
|
| 37 | +def _extract_json(text: str) -> Optional[Any]: |
| 38 | + """Extract JSON from LLM response text. |
| 39 | +
|
| 40 | + Handles multiple formats: |
| 41 | + 1. ```json ... ``` code blocks |
| 42 | + 2. ``` ... ``` code blocks without language tag |
| 43 | + 3. Raw JSON object {...} |
| 44 | + 4. Raw JSON array [...] |
| 45 | + """ |
| 46 | + # Try code block with json tag |
| 47 | + json_match = re.search(r"```json\s*([\s\S]*?)```", text) |
| 48 | + if json_match: |
| 49 | + try: |
| 50 | + return json.loads(json_match.group(1).strip()) |
| 51 | + except json.JSONDecodeError: |
| 52 | + pass |
| 53 | + |
| 54 | + # Try code block without tag |
| 55 | + json_match = re.search(r"```\s*([\s\S]*?)```", text) |
| 56 | + if json_match: |
| 57 | + try: |
| 58 | + return json.loads(json_match.group(1).strip()) |
| 59 | + except json.JSONDecodeError: |
| 60 | + pass |
| 61 | + |
| 62 | + # Try to find raw JSON object |
| 63 | + json_match = re.search(r"\{[\s\S]*\}", text) |
| 64 | + if json_match: |
| 65 | + try: |
| 66 | + return json.loads(json_match.group(0)) |
| 67 | + except json.JSONDecodeError: |
| 68 | + pass |
| 69 | + |
| 70 | + # Try to find raw JSON array |
| 71 | + json_match = re.search(r"\[[\s\S]*\]", text) |
| 72 | + if json_match: |
| 73 | + try: |
| 74 | + return json.loads(json_match.group(0)) |
| 75 | + except json.JSONDecodeError: |
| 76 | + pass |
| 77 | + |
| 78 | + return None |
| 79 | + |
| 80 | + |
36 | 81 | class FetchContent(AsyncBaseNode): |
37 | 82 | """URL에서 웹 콘텐츠 수집 (BS4/Playwright).""" |
38 | 83 |
|
@@ -79,15 +124,8 @@ async def execute(self, state, config=None): |
79 | 124 | self.log("컨텐츠 분석 중...") |
80 | 125 | response = await llm.ainvoke(prompt) |
81 | 126 |
|
82 | | - try: |
83 | | - # Parse JSON from response |
84 | | - content = response.content |
85 | | - # Extract JSON from markdown code block if present |
86 | | - json_match = re.search(r"```(?:json)?\s*([\s\S]*?)```", content) |
87 | | - if json_match: |
88 | | - content = json_match.group(1) |
89 | | - analyzed_content = json.loads(content) |
90 | | - except json.JSONDecodeError: |
| 127 | + analyzed_content = _extract_json(response.content) |
| 128 | + if not analyzed_content: |
91 | 129 | # Fallback structure |
92 | 130 | analyzed_content = { |
93 | 131 | "title": "Untitled", |
@@ -134,14 +172,13 @@ async def execute(self, state, config=None): |
134 | 172 | self.log("키워드 추천 중...") |
135 | 173 | response = await llm.ainvoke(prompt) |
136 | 174 |
|
137 | | - try: |
138 | | - content = response.content |
139 | | - json_match = re.search(r"```(?:json)?\s*([\s\S]*?)```", content) |
140 | | - if json_match: |
141 | | - content = json_match.group(1) |
142 | | - data = json.loads(content) |
| 175 | + data = _extract_json(response.content) |
| 176 | + if data and isinstance(data, dict) and "keywords" in data: |
143 | 177 | suggested_keywords = data.get("keywords", [])[:30] |
144 | | - except json.JSONDecodeError: |
| 178 | + elif data and isinstance(data, list): |
| 179 | + # Handle case where LLM returns just an array |
| 180 | + suggested_keywords = data[:30] |
| 181 | + else: |
145 | 182 | # Fallback: extract any quoted words |
146 | 183 | suggested_keywords = re.findall(r'"([^"]+)"', response.content)[:30] |
147 | 184 |
|
@@ -226,13 +263,8 @@ async def execute(self, state, config=None): |
226 | 263 | self.log("SEO 최적화 중...") |
227 | 264 | response = await llm.ainvoke(prompt) |
228 | 265 |
|
229 | | - try: |
230 | | - content = response.content |
231 | | - json_match = re.search(r"```(?:json)?\s*([\s\S]*?)```", content) |
232 | | - if json_match: |
233 | | - content = json_match.group(1) |
234 | | - seo_meta = json.loads(content) |
235 | | - except json.JSONDecodeError: |
| 266 | + seo_meta = _extract_json(response.content) |
| 267 | + if not seo_meta: |
236 | 268 | seo_meta = { |
237 | 269 | "title": "Blog Post", |
238 | 270 | "description": blog_markdown[:160], |
|
0 commit comments