Demo_DH/Demo3.py at main · Harkhh/Demo_DH · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
import sys
import pyautogui
import pytesseract
from PIL import Image
import cv2
import numpy as np
import time
from PyQt5.QtWidgets import QApplication, QWidget, QPushButton, QVBoxLayout, QTextEdit, QMessageBox, QHBoxLayout, QLabel, QDesktopWidget
from PyQt5.QtCore import Qt

# =============================================================================
# 重要配置：请将此路径修改为您系统中 Tesseract-OCR 的安装路径
# =============================================================================
pytesseract.pytesseract.tesseract_cmd = r'I:\Program Files\Tesseract-OCR\tesseract.exe'
# =============================================================================

# 全局变量用于鼠标截图
drawing = False
start_point = (-1, -1)
end_point = (-1, -1)
selected = False
img_cv = None
img_copy = None

def mouse_callback(event, x, y, flags, param):
    """鼠标回调函数，用于绘制矩形选择框"""
    global drawing, start_point, end_point, selected, img_copy, img_cv

    if event == cv2.EVENT_LBUTTONDOWN:
        drawing = True
        start_point = (x, y)
        selected = False
    elif event == cv2.EVENT_MOUSEMOVE:
        if drawing:
            end_point = (x, y)
            img_copy = img_cv.copy()
            cv2.rectangle(img_copy, start_point, end_point, (0, 255, 0), 2)
            cv2.imshow("选择识别区域", img_copy)
    elif event == cv2.EVENT_LBUTTONUP:
        drawing = False
        end_point = (x, y)
        selected = True
        cv2.rectangle(img_copy, start_point, end_point, (0, 255, 0), 2)
        cv2.imshow("选择识别区域", img_copy)

def take_screenshot_and_select_region():
    """截取全屏，并让用户选择一个区域"""
    global img_cv, img_copy, selected, drawing, start_point, end_point

    # 重置状态
    drawing = False
    start_point = (-1, -1)
    end_point = (-1, -1)
    selected = False

    screenshot = pyautogui.screenshot()
    img_array = np.array(screenshot)
    img_cv = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
    img_copy = img_cv.copy()

    cv2.namedWindow("选择识别区域", cv2.WINDOW_NORMAL)
    cv2.setWindowProperty("选择识别区域", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
    cv2.setMouseCallback("选择识别区域", mouse_callback)
    cv2.imshow("选择识别区域", img_cv)

    while True:
        if cv2.waitKey(1) & 0xFF == 27 or selected: # 按ESC或选择完成
            break

    cv2.destroyAllWindows()

    if selected and start_point != (-1, -1) and end_point != (-1, -1):
        x = min(start_point[0], end_point[0])
        y = min(start_point[1], end_point[1])
        w = abs(end_point[0] - start_point[0])
        h = abs(end_point[1] - start_point[1])

        if w > 0 and h > 0:
            return screenshot.crop((x, y, x + w, y + h))
    return None

def recognize_text_from_image(img):
    """从图片中识别文字"""
    if img is None:
        return "未选择图片区域。"
    try:
        # 使用Tesseract进行OCR识别，lang='chi_sim'表示使用简体中文语言包
        # 如果您需要识别英文，可以使用 lang='eng'
        # Tesseract需要安装相应的语言包才能识别
        text = pytesseract.image_to_string(img, lang='chi_sim+eng')
        return text if text else "未能识别到任何文字。"
    except pytesseract.TesseractNotFoundError:
        return "错误：Tesseract-OCR未安装或未在脚本中正确配置路径。"
    except Exception as e:
        return f"识别时发生错误: {e}"

class OCRApp(QWidget):
    def __init__(self):
        super().__init__()
        self.initUI()
        self.offset = None

    def initUI(self):
        # 设置为无边框、总在最前的悬浮窗
        self.setWindowFlags(Qt.FramelessWindowHint | Qt.WindowStaysOnTopHint | Qt.Tool)
        self.setAttribute(Qt.WA_TranslucentBackground, False)  # 不透明背景

        # 设置窗口位置和大小（右上角位置）
        desktop = QDesktopWidget()
        screen = desktop.screenGeometry()
        self.setGeometry(screen.width() - 280, 50, 260, 220)  # 稍微增加窗口高度

        # 设置窗口样式
        self.setStyleSheet("""
            QWidget {
                background-color: #2b2b2b;
                border: 2px solid #3c3c3c;
                border-radius: 8px;
                color: white;
            }
            QLabel {
                background-color: transparent;
                border: none;
                color: #cccccc;
                font-weight: bold;
                padding: 4px;
            }
            QPushButton {
                background-color: #404040;
                border: 1px solid #555555;
                border-radius: 4px;
                padding: 8px;
                font-weight: bold;
            }
            QPushButton:hover {
                background-color: #505050;
                border-color: #666666;
            }
            QPushButton:pressed {
                background-color: #353535;
            }
            QTextEdit {
                background-color: #353535;
                border: 1px solid #555555;
                border-radius: 4px;
                padding: 4px;
                font-family: 'Microsoft YaHei', Arial;
                font-size: 9pt;
            }
            #closeButton {
                background-color: transparent;
                border: none;
                font-size: 18px;
                font-weight: bold;
                color: #cccccc;
                border-radius: 15px;
                min-width: 30px;
                max-width: 30px;
                min-height: 30px;
                max-height: 30px;
            }
            #closeButton:hover {
                background-color: #e81123;
                color: white;
            }
            #titleBar {
                background-color: transparent;
                border: none;
                min-height: 40px;
                max-height: 40px;
            }
        """)

        # 主布局
        main_layout = QVBoxLayout()
        main_layout.setContentsMargins(8, 8, 8, 8)
        main_layout.setSpacing(6)

        # 标题栏 - 不再使用布局
        title_bar = QWidget(self)
        title_bar.setObjectName("titleBar")
        title_bar.setFixedHeight(40)

        # 标题文本
        self.title_label = QLabel("📷 OCR 识别工具", title_bar)
        self.title_label.setStyleSheet("QLabel { background: transparent; border: none; font-weight: bold; color: #cccccc; font-size: 12px; }")

        # 关闭按钮
        self.close_button = QPushButton('×', title_bar)
        self.close_button.setObjectName("closeButton")
        self.close_button.setFixedSize(30, 30)
        self.close_button.clicked.connect(self.close_application)

        # 手动定位标题和按钮
        self.update_title_bar_positions()

        # 主要控件
        self.ocr_button = QPushButton('🔍 开始截图识别', self)
        self.ocr_button.clicked.connect(self.run_ocr)

        self.result_text = QTextEdit(self)
        self.result_text.setReadOnly(True)
        self.result_text.setPlaceholderText("识别结果将显示在这里...")
        self.result_text.setMaximumHeight(120)

        # 添加到主布局
        main_layout.addWidget(title_bar)
        main_layout.addWidget(self.ocr_button)
        main_layout.addWidget(self.result_text)

        self.setLayout(main_layout)

    def update_title_bar_positions(self):
        """计算并设置标题和关闭按钮的位置"""
        self.title_label.move(10, 8)
        self.title_label.adjustSize() # 确保标签大小正确
        close_x = self.width() - self.close_button.width() - 8
        self.close_button.move(close_x, 5)

    def resizeEvent(self, event):
        """当窗口大小改变时，重新定位标题栏控件"""
        super().resizeEvent(event)
        self.update_title_bar_positions()

    def close_application(self):
        """完全关闭应用程序"""
        QApplication.quit()

    def mousePressEvent(self, event):
        """记录鼠标按下的位置，用于拖动窗口"""
        if event.button() == Qt.LeftButton:
            # 只在标题栏区域允许拖动
            if event.pos().y() <= 42:  # 调整为新的标题栏高度
                self.offset = event.pos()

    def mouseMoveEvent(self, event):
        """根据鼠标移动来移动窗口"""
        if self.offset is not None and event.buttons() == Qt.LeftButton:
            self.move(self.pos() + event.pos() - self.offset)

    def mouseReleaseEvent(self, event):
        """鼠标释放后重置位置"""
        self.offset = None

    def run_ocr(self):
        # 截图前隐藏主窗口，避免截到自己
        # self.hide()
        # 等待一小段时间确保窗口完全隐藏
        time.sleep(0.3)

        selected_image = take_screenshot_and_select_region()

        # 识别完成后恢复窗口
        self.show()
        self.raise_()  # 确保窗口显示在最前面

        if selected_image:
            recognized_text = recognize_text_from_image(selected_image)
            self.result_text.setText(recognized_text)
        else:
            self.result_text.setText("用户取消了截图。")

def main():
    app = QApplication(sys.argv)
    ex = OCRApp()
    ex.show()
    sys.exit(app.exec_())

if __name__ == '__main__':
    main()