Skip to content

Commit 4f2f13f

Browse files
committed
feat(question-tools): add Zhipu GLM-OCR PDF parser and enhance question generation pipeline
- Add Zhipu GLM-OCR PDF parser implementation with layout analysis and image extraction - Introduce configurable PDF parser selection (zhipu/mineru) in question config - Increase max parallel questions from 1 to 50 for improved throughput - Extend LLM request timeouts from 120s to 600s for longer-running operations - Add comprehensive test suite for PDF parsing, OCR, extraction, and full pipeline flows - Update exam mimic workflow to dynamically select parser based on configuration - Enhance API responses with parser type information in status messages - Export new Zhipu parser from question tools module
1 parent c23bc71 commit 4f2f13f

File tree

14 files changed

+1261
-12
lines changed

14 files changed

+1261
-12
lines changed

config/main.yaml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,14 @@ logging:
4040
question:
4141
# Refactored: no iteration loops (max_rounds removed)
4242
rag_query_count: 3
43-
max_parallel_questions: 1
43+
max_parallel_questions: 50
4444
rag_mode: naive
45+
# PDF parsing configuration
46+
pdf_parser: zhipu # Options: mineru, zhipu
47+
zhipu_ocr:
48+
model: glm-ocr
49+
api_url: https://open.bigmodel.cn/api/paas/v4/layout_parsing
50+
timeout: 300
4551
agents:
4652
retrieve:
4753
top_k: 30

src/api/routers/question.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
config = load_config_with_main("question_config.yaml", project_root)
3131
log_dir = config.get("paths", {}).get("user_log_dir") or config.get("logging", {}).get("log_dir")
3232
logger = get_logger("QuestionAPI", log_dir=log_dir)
33+
QUESTION_CFG = config.get("question", {})
3334

3435
router = APIRouter()
3536

@@ -203,7 +204,10 @@ def close(self):
203204
{
204205
"type": "status",
205206
"stage": "parsing",
206-
"content": "Parsing PDF exam paper (MinerU)...",
207+
"content": (
208+
"Parsing PDF exam paper "
209+
f"({'Zhipu GLM-OCR' if QUESTION_CFG.get('pdf_parser', 'zhipu') == 'zhipu' else 'MinerU'})..."
210+
),
207211
}
208212
)
209213
logger.info(f"Saved and validated uploaded PDF to: {pdf_path}")
@@ -461,9 +465,7 @@ async def log_pusher():
461465
"Please create or select a knowledge base and add documents first."
462466
)
463467
try:
464-
await websocket.send_json(
465-
{"type": "error", "content": error_content}
466-
)
468+
await websocket.send_json({"type": "error", "content": error_content})
467469
except (RuntimeError, WebSocketDisconnect):
468470
pass
469471
logger.warning(f"Question generation failed: {error_content}")

src/services/llm/cloud_provider.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ async def _openai_complete(
231231
if "response_format" in kwargs:
232232
data["response_format"] = kwargs["response_format"]
233233

234-
timeout = aiohttp.ClientTimeout(total=120)
234+
timeout = aiohttp.ClientTimeout(total=600)
235235
async with aiohttp.ClientSession(timeout=timeout) as session:
236236
async with session.post(url, headers=headers, json=data) as resp:
237237
if resp.status == 200:
@@ -401,7 +401,7 @@ async def _anthropic_complete(
401401
"temperature": kwargs.get("temperature", 0.7),
402402
}
403403

404-
timeout = aiohttp.ClientTimeout(total=120)
404+
timeout = aiohttp.ClientTimeout(total=600)
405405
async with aiohttp.ClientSession(timeout=timeout) as session:
406406
async with session.post(url, headers=headers, json=data) as response:
407407
if response.status != 200:

src/tools/question/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,12 @@
66

77
from .exam_mimic import mimic_exam_questions
88
from .pdf_parser import parse_pdf_with_mineru
9+
from .pdf_parser_zhipu import parse_pdf_with_zhipu
910
from .question_extractor import extract_questions_from_paper
1011

1112
__all__ = [
1213
"parse_pdf_with_mineru",
14+
"parse_pdf_with_zhipu",
1315
"extract_questions_from_paper",
1416
"mimic_exam_questions",
1517
]

src/tools/question/exam_mimic.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
# Note: AgentCoordinator is imported inside functions to avoid circular import
3030
from src.services.llm.config import get_llm_config
3131
from src.tools.question.pdf_parser import parse_pdf_with_mineru
32+
from src.tools.question.pdf_parser_zhipu import parse_pdf_with_zhipu
3233
from src.tools.question.question_extractor import extract_questions_from_paper
3334

3435
# Type alias for WebSocket callback
@@ -201,12 +202,20 @@ async def send_progress(event_type: str, data: dict[str, Any]):
201202
# If a PDF is provided, parse it first
202203
elif pdf_path:
203204
# Stage 1: Parsing PDF
205+
# Load config to determine which parser to use
206+
from src.services.config import load_config_with_main
207+
config = load_config_with_main("question_config.yaml", project_root)
208+
question_cfg = config.get("question", {})
209+
pdf_parser = question_cfg.get("pdf_parser", "zhipu") # Default to zhipu
210+
211+
parser_name = "Zhipu GLM-OCR" if pdf_parser == "zhipu" else "MinerU"
212+
204213
await send_progress(
205214
"progress",
206-
{"stage": "parsing", "status": "running", "message": "Parsing PDF with MinerU..."},
215+
{"stage": "parsing", "status": "running", "message": f"Parsing PDF with {parser_name}..."},
207216
)
208217

209-
print("🔄 Step 1: parse the PDF exam")
218+
print(f"🔄 Step 1: parse the PDF exam (using {parser_name})")
210219
print("-" * 80)
211220

212221
# Use provided output_dir or default to mimic_papers
@@ -216,11 +225,15 @@ async def send_progress(event_type: str, data: dict[str, Any]):
216225
output_base = project_root / "data" / "user" / "question" / "mimic_papers"
217226
output_base.mkdir(parents=True, exist_ok=True)
218227

219-
success = parse_pdf_with_mineru(pdf_path=pdf_path, output_base_dir=str(output_base))
228+
# Choose parser based on config
229+
if pdf_parser == "zhipu":
230+
success = parse_pdf_with_zhipu(pdf_path=pdf_path, output_base_dir=str(output_base))
231+
else:
232+
success = parse_pdf_with_mineru(pdf_path=pdf_path, output_base_dir=str(output_base))
220233

221234
if not success:
222-
await send_progress("error", {"content": "Failed to parse PDF with MinerU"})
223-
return {"success": False, "error": "Failed to parse PDF"}
235+
await send_progress("error", {"content": f"Failed to parse PDF with {parser_name}"})
236+
return {"success": False, "error": f"Failed to parse PDF with {parser_name}"}
224237

225238
print()
226239

0 commit comments

Comments
 (0)