-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmake_markdown.py
52 lines (42 loc) · 1.96 KB
/
make_markdown.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import os
def make_markdown(image_folder, text_folder, basename, md_filename):
"""
将图像文件和文本文件合并为一个 Markdown 文件。
参数:
- image_folder: 存放图像文件的目录
- text_folder: 存放文本文件的目录
- basename: 图像和文本文件的共同前缀(如 "block_")
- md_filename: 输出的 Markdown 文件名
"""
# 获取所有以 basename 开头并按数字排序的图像和文本文件
image_files = sorted(
f for f in os.listdir(image_folder) if f.startswith(basename) and f.endswith('.png')
)
text_files = sorted(
f for f in os.listdir(text_folder) if f.startswith(basename) and f.endswith('.txt')
)
# 检查图像和文本文件数量是否一致
if len(image_files) != len(text_files):
print("警告:图像文件和文本文件数量不一致!请检查目录内容。")
return
# 创建并写入 Markdown 文件
with open(md_filename, 'w', encoding='utf-8') as md_file:
for img_file, txt_file in zip(image_files, text_files):
img_path = os.path.join(image_folder, img_file)
txt_path = os.path.join(text_folder, txt_file)
# 读取文本文件内容
with open(txt_path, 'r', encoding='utf-8') as f:
text_content = f.read().strip()
# 写入 Markdown 文件
md_file.write(f'### {img_file}\n\n')
md_file.write(f'\n\n')
md_file.write(f'**文本内容**:\n\n{text_content}\n\n---\n\n')
print(f"Markdown 文件已生成: {md_filename}")
def main():
image_folder = 'output_split_1' # 图像文件目录
text_folder = 'ocr_output_1' # 文本文件目录
basename = 'block_' # 文件的共同前缀
md_filename = 'output.md' # 输出的 Markdown 文件
make_markdown(image_folder, text_folder, basename, md_filename)
if __name__ == '__main__':
main()