-
Notifications
You must be signed in to change notification settings - Fork 63
Description
System Info / 系統信息
通过api调用 以下pdf。 paddle识别的更好
Who can help? / 谁可以帮助到您?
No response
Information / 问题信息
- The official example scripts / 官方的示例脚本
- My own modified scripts / 我自己修改的脚本和任务
Reproduction / 复现过程
import base64
import requests
from pathlib import Path
API 配置
API_KEY = "xxxx"
API_URL = "https://open.bigmodel.cn/api/paas/v4/layout_parsing"
本地 PDF 文件路径
pdf_path = Path(r"C:\work\code\ocr\data\nvidia.pdf")
def encode_file_to_base64(file_path: Path) -> str:
"""将文件编码为 base64 字符串"""
with open(file_path, "rb") as f:
file_content = f.read()
base64_str = base64.b64encode(file_content).decode("utf-8")
# 添加 data URI 前缀
return f"data:application/pdf;base64,{base64_str}"
def call_glm_ocr(file_base64: str) -> dict:
"""调用 GLM-OCR API"""
headers = {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
}
payload = {
"model": "glm-ocr",
"file": file_base64,
"need_layout_visualization": False
}
response = requests.post(API_URL, headers=headers, json=payload, timeout=120)
return response.json()
if name == "main":
# 检查文件是否存在
if not pdf_path.exists():
print(f"错误: 文件不存在 - {pdf_path}")
else:
print(f"正在解析文件: {pdf_path.name}")
file_size_mb = pdf_path.stat().st_size / 1024 / 1024
print(f"文件大小: {file_size_mb:.2f} MB")
print("-" * 50)
# 将 PDF 转换为 base64
print("正在编码文件...")
file_base64 = encode_file_to_base64(pdf_path)
# 调用 API
print("正在调用 GLM-OCR API...")
result = call_glm_ocr(file_base64)
# 检查是否有错误
if "error" in result:
print(f"API 错误: {result['error']}")
else:
# 保存结果到文件
output_file = pdf_path.with_suffix(".md")
if "md_results" in result:
md_content = result["md_results"]
with open(output_file, "w", encoding="utf-8") as f:
f.write(md_content)
print(f"OCR 结果已保存到: {output_file}")
else:
print(result)
# 输出 Token 使用统计
if "usage" in result:
usage = result["usage"]
print("\n=== Token 使用统计 ===")
print(f"输入 Token: {usage.get('prompt_tokens', 'N/A')}")
print(f"输出 Token: {usage.get('completion_tokens', 'N/A')}")
print(f"总 Token: {usage.get('total_tokens', 'N/A')}")
Expected behavior / 期待表现
全都识别