-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdecompose_large_image.py
96 lines (76 loc) · 3.26 KB
/
decompose_large_image.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import cv2
import numpy as np
import math
def process_large_image(input_path, output_folder, scale_ratio=0.2, min_ppt_area=50000):
# 读取原始图像
orig = cv2.imread(input_path)
h, w = orig.shape[:2]
# 生成缩放图像(保持宽高比)
scaled_w = int(w * scale_ratio)
scaled_h = int(h * scale_ratio)
scaled_img = cv2.resize(orig, (scaled_w, scaled_h), interpolation=cv2.INTER_AREA)
# 自适应灰度化和二值化
gray = cv2.cvtColor(scaled_img, cv2.COLOR_BGR2GRAY)
thresh = cv2.adaptiveThreshold(gray, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 21, 4)
# 形态学操作参数自动计算
kernel_size = max(1, int(min(scaled_w, scaled_h)*0.005))
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, kernel_size))
# 消除文字噪点
cleaned = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=2)
cleaned = cv2.morphologyEx(cleaned, cv2.MORPH_OPEN, kernel, iterations=1)
# 轮廓检测
contours, _ = cv2.findContours(cleaned, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# 筛选PPT区域
ppt_contours = []
for cnt in contours:
area = cv2.contourArea(cnt)
if area > min_ppt_area * (scale_ratio**2):
ppt_contours.append(cnt)
# 创建缩放比例转换矩阵
scale_matrix = np.array([[1/scale_ratio, 0], [0, 1/scale_ratio]])
# 处理每个PPT区域
for i, cnt in enumerate(ppt_contours):
# 转换到原始坐标
orig_cnt = np.round(cnt * scale_matrix).astype(np.int32)
# 获取边界矩形
x, y, cw, ch = cv2.boundingRect(orig_cnt)
# 安全裁剪
x1 = max(0, x)
y1 = max(0, y)
x2 = min(w, x + cw)
y2 = min(h, y + ch)
# 保存PPT区域
cv2.imwrite(f"{output_folder}/ppt_{i}.png", orig[y1:y2, x1:x2])
# 创建掩膜
mask = np.zeros((h, w), dtype=np.uint8)
cv2.drawContours(mask, [orig_cnt], -1, 255, -1)
# 移除已处理的区域
orig = cv2.inpaint(orig, mask, 3, cv2.INPAINT_TELEA)
# 处理文字区域
gray_orig = cv2.cvtColor(orig, cv2.COLOR_BGR2GRAY)
text_thresh = cv2.adaptiveThreshold(gray_orig, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 31, 6)
# 文本段落检测
text_contours, _ = cv2.findContours(text_thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
# 文本区域处理
text_index = 0
for cnt in text_contours:
area = cv2.contourArea(cnt)
if 100 < area < 10000:
x, y, w, h = cv2.boundingRect(cnt)
# 扩展边界
pad = 10
x = max(0, x - pad)
y = max(0, y - pad)
w = min(orig.shape[1]-x, w + 2*pad)
h = min(orig.shape[0]-y, h + 2*pad)
# 保存文字区域
cv2.imwrite(f"{output_folder}/text_{text_index}.png", orig[y:y+h, x:x+w])
text_index += 1
def main():
process_large_image("input.jpg", "output", scale_ratio=0.2, min_ppt_area=50000)
if __name__ == "__main__":
main()