Skip to content

Commit bb00d46

Browse files
authored
docs: update blog
1 parent 5be4ff0 commit bb00d46

1 file changed

Lines changed: 93 additions & 2 deletions

File tree

docs/blog/posts/support_pp_doc_layout.md

Lines changed: 93 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ PP-DocLayout系列模型在版面分析方面效果很好,目前已经作为Pa
3535
### 转换命令
3636
3737
```bash
38-
paddle2onnx --model_dir=models/PP-DocLayoutV2 --model_filename inference.json --params_filename inference.pdiparams --save_file=./models/PP-DocLayoutV2/inference_v2.onnx --enable_onnx_checker=True
38+
paddle2onnx --model_dir=models/PP-DocLayoutV2 --model_filename inference.json --params_filename inference.pdiparams --save_file=./models/PP-DocLayoutV2/inference.onnx --enable_onnx_checker=True
3939
```
4040

4141
### 比较结果
@@ -46,7 +46,7 @@ paddle2onnx --model_dir=models/PP-DocLayoutV2 --model_filename inference.json
4646

4747
但是从可视化示例图结果来看,两者并无明显区别。可能在某些图上会有较大区别。
4848

49-
```python linenums="1"
49+
```python linenums="1" title="比较两种格式模型推理结果"
5050

5151
# 省略前面代码... ...
5252

@@ -110,3 +110,94 @@ Max relative difference: 194.
110110
因为PaddleOCR库中需要兼容的推理代码较多,大而全。这也导致了有些臃肿。这是难以避免的。但是如果只看PP-DocLayout推理代码的话,很多问题就很简单了。
111111
112112
完整的推理代码,我放到了Gist上 → [link](https://gist.github.com/SWHL/c9455e8947f4abdfbbd8439c0bb83410)
113+
114+
### 字典写入 ONNX
115+
116+
```python linenums="1" title="write_dict.py"
117+
from pathlib import Path
118+
from typing import List, Union
119+
120+
import onnx
121+
import onnxruntime as ort
122+
from onnx import ModelProto
123+
124+
125+
class ONNXMetaOp:
126+
@classmethod
127+
def add_meta(
128+
cls,
129+
model_path: Union[str, Path],
130+
key: str,
131+
value: List[str],
132+
delimiter: str = "\n",
133+
) -> ModelProto:
134+
model = onnx.load_model(model_path)
135+
meta = model.metadata_props.add()
136+
meta.key = key
137+
meta.value = delimiter.join(value)
138+
return model
139+
140+
@classmethod
141+
def get_meta(
142+
cls, model_path: Union[str, Path], key: str, split_sym: str = "\n"
143+
) -> List[str]:
144+
sess = ort.InferenceSession(model_path)
145+
meta_map = sess.get_modelmeta().custom_metadata_map
146+
key_content = meta_map.get(key)
147+
key_list = key_content.split(split_sym)
148+
return key_list
149+
150+
@classmethod
151+
def del_meta(cls, model_path: Union[str, Path]) -> ModelProto:
152+
model = onnx.load_model(model_path)
153+
del model.metadata_props[:]
154+
return model
155+
156+
@classmethod
157+
def save_model(cls, save_path: Union[str, Path], model: ModelProto):
158+
onnx.save_model(model, save_path)
159+
160+
161+
paper_label = [
162+
"abstract",
163+
"algorithm",
164+
"aside_text",
165+
"chart",
166+
"content",
167+
"display_formula",
168+
"doc_title",
169+
"figure_title",
170+
"footer",
171+
"footer_image",
172+
"footnote",
173+
"formula_number",
174+
"header",
175+
"header_image",
176+
"image",
177+
"inline_formula",
178+
"number",
179+
"paragraph_title",
180+
"reference",
181+
"reference_content",
182+
"seal",
183+
"table",
184+
"text",
185+
"vertical_text",
186+
"vision_footnote",
187+
]
188+
model_path = "models/inference.onnx"
189+
model = ONNXMetaOp.add_meta(model_path, key="character", value=paper_label)
190+
191+
new_model_path = "models/pp_doc_layoutv2.onnx"
192+
ONNXMetaOp.save_model(new_model_path, model)
193+
194+
t = ONNXMetaOp.get_meta(new_model_path, key="character")
195+
print(t)
196+
```
197+
198+
输出以下`label`,则认为成功:
199+
200+
```bash linenums="1"
201+
$ python write_dict.py
202+
['abstract', 'algorithm', 'aside_text', 'chart', 'content', 'display_formula', 'doc_title', 'figure_title', 'footer', 'footer_image', 'footnote', 'formula_number', 'header', 'header_image', 'image', 'inline_formula', 'number', 'paragraph_title', 'reference', 'reference_content', 'seal', 'table', 'text', 'vertical_text', 'vision_footnote']
203+
```

0 commit comments

Comments
 (0)