@@ -108,29 +108,32 @@ def __is_list_or_index_block(block):
108
108
):
109
109
multiple_para_flag = True
110
110
111
- for line in block ['lines' ]:
112
- line_mid_x = (line ['bbox' ][0 ] + line ['bbox' ][2 ]) / 2
113
- block_mid_x = (block ['bbox_fs' ][0 ] + block ['bbox_fs' ][2 ]) / 2
114
- if (
115
- line ['bbox' ][0 ] - block ['bbox_fs' ][0 ] > 0.7 * line_height
116
- and block ['bbox_fs' ][2 ] - line ['bbox' ][2 ] > 0.7 * line_height
117
- ):
118
- external_sides_not_close_num += 1
119
- if abs (line_mid_x - block_mid_x ) < line_height / 2 :
120
- center_close_num += 1
111
+ block_text = ''
121
112
113
+ for line in block ['lines' ]:
122
114
line_text = ''
123
115
124
116
for span in line ['spans' ]:
125
117
span_type = span ['type' ]
126
118
if span_type == ContentType .Text :
127
119
line_text += span ['content' ].strip ()
128
-
129
120
# 添加所有文本,包括空行,保持与block['lines']长度一致
130
121
lines_text_list .append (line_text )
131
122
block_text = '' .join (lines_text_list )
132
- block_lang = detect_lang (block_text )
133
- # logger.info(f"block_lang: {block_lang}")
123
+
124
+ block_lang = detect_lang (block_text )
125
+ # logger.info(f"block_lang: {block_lang}")
126
+
127
+ for line in block ['lines' ]:
128
+ line_mid_x = (line ['bbox' ][0 ] + line ['bbox' ][2 ]) / 2
129
+ block_mid_x = (block ['bbox_fs' ][0 ] + block ['bbox_fs' ][2 ]) / 2
130
+ if (
131
+ line ['bbox' ][0 ] - block ['bbox_fs' ][0 ] > 0.7 * line_height
132
+ and block ['bbox_fs' ][2 ] - line ['bbox' ][2 ] > 0.7 * line_height
133
+ ):
134
+ external_sides_not_close_num += 1
135
+ if abs (line_mid_x - block_mid_x ) < line_height / 2 :
136
+ center_close_num += 1
134
137
135
138
# 计算line左侧顶格数量是否大于2,是否顶格用abs(block['bbox_fs'][0] - line['bbox'][0]) < line_height/2 来判断
136
139
if abs (block ['bbox_fs' ][0 ] - line ['bbox' ][0 ]) < line_height / 2 :
0 commit comments