Skip to content

Commit efd0ea4

Browse files
authored
Add files via upload
1 parent d7a5022 commit efd0ea4

4 files changed

Lines changed: 4321 additions & 0 deletions

File tree

4.png

147 KB
Loading

ocr.py

Lines changed: 252 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,252 @@
1+
import cv2
2+
import numpy as np
3+
import easyocr
4+
import pandas as pd
5+
import requests
6+
import sys
7+
import os
8+
from collections import Counter
9+
10+
def get_resource_path(relative_path):
11+
"""获取资源文件的绝对路径,兼容开发环境和打包后的环境"""
12+
if hasattr(sys, '_MEIPASS'):
13+
# PyInstaller创建临时文件夹,并将路径存储在_MEIPASS中
14+
base_path = getattr(sys, '_MEIPASS')
15+
else:
16+
# 开发环境中使用当前脚本的目录
17+
base_path = os.path.abspath(".")
18+
19+
return os.path.join(base_path, relative_path)
20+
21+
# 全局EasyOCR reader,避免重复初始化
22+
_ocr_reader = None
23+
24+
def get_ocr_reader():
25+
"""获取OCR reader,如果不存在则创建"""
26+
global _ocr_reader
27+
if _ocr_reader is None:
28+
import easyocr
29+
_ocr_reader = easyocr.Reader(['ch_sim', 'en'])
30+
return _ocr_reader
31+
32+
def ocr_and_search_prices(ori_img):
33+
"""
34+
OCR识别图片中的物品并查询Warframe Market价格
35+
36+
参数:
37+
ori_img: 输入图片路径或numpy数组
38+
39+
返回:
40+
list: 包含所有识别和搜索结果的列表
41+
"""
42+
results = []
43+
44+
# ---- 步骤1:提取黄色文字,生成白底黄字图 ----
45+
if isinstance(ori_img, str):
46+
img = cv2.imread(ori_img)
47+
else:
48+
img = ori_img
49+
50+
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
51+
lower_yellow = np.array([20, 100, 150])
52+
upper_yellow = np.array([26, 255, 255])
53+
mask = cv2.inRange(hsv, lower_yellow, upper_yellow)
54+
55+
# 白底
56+
white_bg = np.ones_like(img) * 255
57+
result = cv2.bitwise_and(img, img, mask=mask)
58+
inv_mask = cv2.bitwise_not(mask)
59+
white_part = cv2.bitwise_and(white_bg, white_bg, mask=inv_mask)
60+
final = cv2.add(result, white_part)
61+
cv2.imwrite('yellow_on_white_ori.png', final)
62+
63+
# ---- 步骤2:自动裁剪文字区域 ----
64+
# img = cv2.imread('yellow_on_white_ori.png')
65+
# gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
66+
# _, binary = cv2.threshold(gray, 250, 255, cv2.THRESH_BINARY_INV)
67+
# horizontal_sum = np.sum(binary, axis=1)
68+
# rows = np.where(horizontal_sum > 0)[0]
69+
# if len(rows) > 0:
70+
# top, bottom = rows[0], rows[-1]
71+
# else:
72+
# top, bottom = 0, binary.shape[0]-1
73+
74+
# vertical_sum = np.sum(binary, axis=0)
75+
# cols = np.where(vertical_sum > 0)[0]
76+
# if len(cols) > 0:
77+
# left, right = cols[0], cols[-1]
78+
# else:
79+
# left, right = 0, binary.shape[1]-1
80+
81+
# cropped = img[top:bottom+1, left:right+1]
82+
# cv2.imwrite('cropped.png', cropped)
83+
84+
# ---- 步骤3:EasyOCR识别文字,进行包含合并 ----
85+
reader = get_ocr_reader() # 使用全局reader,避免重复初始化
86+
87+
# 直接从处理后的图像进行OCR,避免文件读写
88+
if isinstance(ori_img, str):
89+
result = reader.readtext(ori_img, detail=1)
90+
else:
91+
# 如果是numpy数组,直接使用
92+
result = reader.readtext(final, detail=1)
93+
94+
# 3. 画bbox到图片上
95+
# for bbox, text, conf in result:
96+
# # 将裁剪后的坐标转换为原始图片坐标
97+
# pts = [(int(float(x) + int(left)), int(float(y) + int(top))) for x, y in bbox]
98+
# # 画多边形框线,pts顺序通常是左上-右上-右下-左下
99+
# cv2.polylines(img, [np.array(pts)], isClosed=True, color=(0,255,0), thickness=2)
100+
101+
# # 4. 保存结果图片
102+
# cv2.imwrite('ocr_boxed.png', img)
103+
#results.append('已保存带框图片 ocr_boxed.png')
104+
105+
# 转结构化list
106+
items = []
107+
for bbox, text, conf in result:
108+
x1 = int(bbox[0][0])
109+
x2 = int(bbox[2][0])
110+
if x1 > x2:
111+
x1, x2 = x2, x1
112+
items.append({'x1': x1, 'x2': x2, 'text': text, 'merged': False})
113+
114+
# 包含合并逻辑
115+
# 记录被合并掉的元素的下标
116+
merged_indices = []
117+
for i, item_a in enumerate(items):
118+
for j in range(i + 1, len(items)):
119+
item_b = items[j]
120+
if item_a['x1'] <= item_b['x1'] and item_a['x2'] >= item_b['x2']:
121+
item_b['text'] = item_a['text'] + item_b['text']
122+
merged_indices.append(i) # 不去重,每次合并都记一次
123+
124+
# 按下标逆序删除,避免下标错位
125+
for idx in sorted(set(merged_indices), reverse=True):
126+
del items[idx]
127+
128+
# 输出最终结果
129+
#results.append("最终合并结果:")
130+
#for item in items:
131+
# if not item['merged']:
132+
# results.append(item['text'])
133+
134+
csv_path = get_resource_path('wfm_item_names_en_zh.csv')
135+
df_map = pd.read_csv(csv_path)
136+
df_map['Chinese_nospace'] = df_map['Chinese'].str.replace(' ', '')
137+
cn2url = dict(zip(df_map['Chinese_nospace'], df_map['url_name']))
138+
139+
def find_en_by_cn(cn):
140+
# 查找前先去空格,并转换为小写
141+
cn_nospace = cn.replace(' ', '').lower()
142+
# 创建小写版本的映射字典
143+
cn2url_lower = {k.lower(): v for k, v in cn2url.items()}
144+
return cn2url_lower.get(cn_nospace, None)
145+
146+
# ---- 查warframe market售价 ----
147+
def get_wfm_prices(item_en_name):
148+
url = f'https://api.warframe.market/v1/items/{item_en_name}/orders'
149+
headers = {
150+
'accept': 'application/json'
151+
}
152+
r = requests.get(url, headers=headers)
153+
if r.status_code != 200:
154+
return None
155+
data = r.json()
156+
if "payload" not in data or "orders" not in data["payload"]:
157+
return None
158+
# 只统计卖家且为在售状态
159+
orders = [o for o in data["payload"]["orders"] if o["order_type"]=="sell" and o["user"]["status"]=="ingame"]
160+
# 前10名
161+
orders = sorted(orders, key=lambda x: x['platinum'])[:10]
162+
price_counter = Counter([o['platinum'] for o in orders])
163+
return price_counter
164+
165+
exact_found = []
166+
need_fuzzy = []
167+
#results.append(f"OCR识别结果:{', '.join(item['text'] for item in items)}")
168+
169+
for item in items:
170+
zh = item['text']
171+
172+
# 检查是否包含Forma关键字
173+
if 'Forma' in zh or 'forma' in zh.lower():
174+
results.append(f"{zh}:未收录")
175+
continue
176+
177+
# 如果以'蓝'结尾,先加上'图'再匹配
178+
search_zh = zh
179+
if zh.endswith('蓝'):
180+
search_zh = zh + '图'
181+
182+
en = find_en_by_cn(search_zh)
183+
if en:
184+
# ---- 精确匹配查价 ----
185+
price_counter = get_wfm_prices(en)
186+
if price_counter:
187+
price_list = [f"{price}{count}人" for price, count in price_counter.items()]
188+
# 如果使用了修正后的名称,显示修正信息
189+
if search_zh != zh:
190+
results.append(f"{search_zh}{', '.join(price_list)}")
191+
else:
192+
results.append(f"{zh}{', '.join(price_list)}")
193+
else:
194+
# 如果使用了修正后的名称,显示修正信息
195+
if search_zh != zh:
196+
results.append(f"{search_zh}:无有效卖单")
197+
else:
198+
results.append(f"{zh}:无有效卖单")
199+
else:
200+
# ---- 模糊搜索1字偏差(忽略空格,不区分大小写) ----
201+
zh_text_nospace = search_zh.replace(' ', '').lower()
202+
fuzzy_list = []
203+
for zh_db, en_db in cn2url.items():
204+
zh_db_lower = zh_db.lower()
205+
if len(zh_db_lower) == len(zh_text_nospace):
206+
diff = sum(a != b for a, b in zip(zh_db_lower, zh_text_nospace))
207+
if diff == 1:
208+
fuzzy_list.append((zh_db, en_db))
209+
if fuzzy_list:
210+
display_name = search_zh if search_zh != zh else zh
211+
results.append(f"模糊搜索 '{display_name}'结果:")
212+
for zh_match, en_fuzzy in fuzzy_list:
213+
price_counter = get_wfm_prices(en_fuzzy)
214+
if price_counter:
215+
price_list = [f"{price}{count}人" for price, count in price_counter.items()]
216+
results.append(f" {zh_match}{', '.join(price_list)}")
217+
else:
218+
results.append(f" {zh_match}:无有效卖单")
219+
else:
220+
# ---- 少一字匹配(词库里的词比识别出的词多一个字,且只能是最后一个字,不区分大小写) ----
221+
less_one_list = []
222+
for zh_db, en_db in cn2url.items():
223+
zh_db_lower = zh_db.lower()
224+
# 词库中的词长度比识别出的词长度多1
225+
if len(zh_db_lower) == len(zh_text_nospace) + 1:
226+
# 检查词库中的词去掉最后一个字符后是否与识别出的词完全匹配
227+
if zh_db_lower[:-1] == zh_text_nospace:
228+
less_one_list.append((zh_db, en_db))
229+
230+
if less_one_list:
231+
display_name = search_zh if search_zh != zh else zh
232+
results.append(f"少一字匹配 '{display_name}'结果:")
233+
for zh_match, en_fuzzy in less_one_list:
234+
price_counter = get_wfm_prices(en_fuzzy)
235+
if price_counter:
236+
price_list = [f"{price}{count}人" for price, count in price_counter.items()]
237+
results.append(f" {zh_match}{', '.join(price_list)}")
238+
else:
239+
results.append(f" {zh_match}:无有效卖单")
240+
else:
241+
# 使用修正后的名称显示搜索结果
242+
display_name = search_zh if search_zh != zh else zh
243+
results.append(f"模糊搜索'{display_name}'无匹配结果")
244+
245+
return results
246+
247+
248+
# 为了保持向后兼容,如果直接运行此文件,使用默认参数
249+
if __name__ == "__main__":
250+
results = ocr_and_search_prices('yellow_on_white_ori.png')
251+
for result in results:
252+
print(result)

0 commit comments

Comments
 (0)