1+ import cv2
2+ import numpy as np
3+ import easyocr
4+ import pandas as pd
5+ import requests
6+ import sys
7+ import os
8+ from collections import Counter
9+
10+ def get_resource_path (relative_path ):
11+ """获取资源文件的绝对路径,兼容开发环境和打包后的环境"""
12+ if hasattr (sys , '_MEIPASS' ):
13+ # PyInstaller创建临时文件夹,并将路径存储在_MEIPASS中
14+ base_path = getattr (sys , '_MEIPASS' )
15+ else :
16+ # 开发环境中使用当前脚本的目录
17+ base_path = os .path .abspath ("." )
18+
19+ return os .path .join (base_path , relative_path )
20+
21+ # 全局EasyOCR reader,避免重复初始化
22+ _ocr_reader = None
23+
24+ def get_ocr_reader ():
25+ """获取OCR reader,如果不存在则创建"""
26+ global _ocr_reader
27+ if _ocr_reader is None :
28+ import easyocr
29+ _ocr_reader = easyocr .Reader (['ch_sim' , 'en' ])
30+ return _ocr_reader
31+
32+ def ocr_and_search_prices (ori_img ):
33+ """
34+ OCR识别图片中的物品并查询Warframe Market价格
35+
36+ 参数:
37+ ori_img: 输入图片路径或numpy数组
38+
39+ 返回:
40+ list: 包含所有识别和搜索结果的列表
41+ """
42+ results = []
43+
44+ # ---- 步骤1:提取黄色文字,生成白底黄字图 ----
45+ if isinstance (ori_img , str ):
46+ img = cv2 .imread (ori_img )
47+ else :
48+ img = ori_img
49+
50+ hsv = cv2 .cvtColor (img , cv2 .COLOR_BGR2HSV )
51+ lower_yellow = np .array ([20 , 100 , 150 ])
52+ upper_yellow = np .array ([26 , 255 , 255 ])
53+ mask = cv2 .inRange (hsv , lower_yellow , upper_yellow )
54+
55+ # 白底
56+ white_bg = np .ones_like (img ) * 255
57+ result = cv2 .bitwise_and (img , img , mask = mask )
58+ inv_mask = cv2 .bitwise_not (mask )
59+ white_part = cv2 .bitwise_and (white_bg , white_bg , mask = inv_mask )
60+ final = cv2 .add (result , white_part )
61+ cv2 .imwrite ('yellow_on_white_ori.png' , final )
62+
63+ # ---- 步骤2:自动裁剪文字区域 ----
64+ # img = cv2.imread('yellow_on_white_ori.png')
65+ # gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
66+ # _, binary = cv2.threshold(gray, 250, 255, cv2.THRESH_BINARY_INV)
67+ # horizontal_sum = np.sum(binary, axis=1)
68+ # rows = np.where(horizontal_sum > 0)[0]
69+ # if len(rows) > 0:
70+ # top, bottom = rows[0], rows[-1]
71+ # else:
72+ # top, bottom = 0, binary.shape[0]-1
73+
74+ # vertical_sum = np.sum(binary, axis=0)
75+ # cols = np.where(vertical_sum > 0)[0]
76+ # if len(cols) > 0:
77+ # left, right = cols[0], cols[-1]
78+ # else:
79+ # left, right = 0, binary.shape[1]-1
80+
81+ # cropped = img[top:bottom+1, left:right+1]
82+ # cv2.imwrite('cropped.png', cropped)
83+
84+ # ---- 步骤3:EasyOCR识别文字,进行包含合并 ----
85+ reader = get_ocr_reader () # 使用全局reader,避免重复初始化
86+
87+ # 直接从处理后的图像进行OCR,避免文件读写
88+ if isinstance (ori_img , str ):
89+ result = reader .readtext (ori_img , detail = 1 )
90+ else :
91+ # 如果是numpy数组,直接使用
92+ result = reader .readtext (final , detail = 1 )
93+
94+ # 3. 画bbox到图片上
95+ # for bbox, text, conf in result:
96+ # # 将裁剪后的坐标转换为原始图片坐标
97+ # pts = [(int(float(x) + int(left)), int(float(y) + int(top))) for x, y in bbox]
98+ # # 画多边形框线,pts顺序通常是左上-右上-右下-左下
99+ # cv2.polylines(img, [np.array(pts)], isClosed=True, color=(0,255,0), thickness=2)
100+
101+ # # 4. 保存结果图片
102+ # cv2.imwrite('ocr_boxed.png', img)
103+ #results.append('已保存带框图片 ocr_boxed.png')
104+
105+ # 转结构化list
106+ items = []
107+ for bbox , text , conf in result :
108+ x1 = int (bbox [0 ][0 ])
109+ x2 = int (bbox [2 ][0 ])
110+ if x1 > x2 :
111+ x1 , x2 = x2 , x1
112+ items .append ({'x1' : x1 , 'x2' : x2 , 'text' : text , 'merged' : False })
113+
114+ # 包含合并逻辑
115+ # 记录被合并掉的元素的下标
116+ merged_indices = []
117+ for i , item_a in enumerate (items ):
118+ for j in range (i + 1 , len (items )):
119+ item_b = items [j ]
120+ if item_a ['x1' ] <= item_b ['x1' ] and item_a ['x2' ] >= item_b ['x2' ]:
121+ item_b ['text' ] = item_a ['text' ] + item_b ['text' ]
122+ merged_indices .append (i ) # 不去重,每次合并都记一次
123+
124+ # 按下标逆序删除,避免下标错位
125+ for idx in sorted (set (merged_indices ), reverse = True ):
126+ del items [idx ]
127+
128+ # 输出最终结果
129+ #results.append("最终合并结果:")
130+ #for item in items:
131+ # if not item['merged']:
132+ # results.append(item['text'])
133+
134+ csv_path = get_resource_path ('wfm_item_names_en_zh.csv' )
135+ df_map = pd .read_csv (csv_path )
136+ df_map ['Chinese_nospace' ] = df_map ['Chinese' ].str .replace (' ' , '' )
137+ cn2url = dict (zip (df_map ['Chinese_nospace' ], df_map ['url_name' ]))
138+
139+ def find_en_by_cn (cn ):
140+ # 查找前先去空格,并转换为小写
141+ cn_nospace = cn .replace (' ' , '' ).lower ()
142+ # 创建小写版本的映射字典
143+ cn2url_lower = {k .lower (): v for k , v in cn2url .items ()}
144+ return cn2url_lower .get (cn_nospace , None )
145+
146+ # ---- 查warframe market售价 ----
147+ def get_wfm_prices (item_en_name ):
148+ url = f'https://api.warframe.market/v1/items/{ item_en_name } /orders'
149+ headers = {
150+ 'accept' : 'application/json'
151+ }
152+ r = requests .get (url , headers = headers )
153+ if r .status_code != 200 :
154+ return None
155+ data = r .json ()
156+ if "payload" not in data or "orders" not in data ["payload" ]:
157+ return None
158+ # 只统计卖家且为在售状态
159+ orders = [o for o in data ["payload" ]["orders" ] if o ["order_type" ]== "sell" and o ["user" ]["status" ]== "ingame" ]
160+ # 前10名
161+ orders = sorted (orders , key = lambda x : x ['platinum' ])[:10 ]
162+ price_counter = Counter ([o ['platinum' ] for o in orders ])
163+ return price_counter
164+
165+ exact_found = []
166+ need_fuzzy = []
167+ #results.append(f"OCR识别结果:{', '.join(item['text'] for item in items)}")
168+
169+ for item in items :
170+ zh = item ['text' ]
171+
172+ # 检查是否包含Forma关键字
173+ if 'Forma' in zh or 'forma' in zh .lower ():
174+ results .append (f"{ zh } :未收录" )
175+ continue
176+
177+ # 如果以'蓝'结尾,先加上'图'再匹配
178+ search_zh = zh
179+ if zh .endswith ('蓝' ):
180+ search_zh = zh + '图'
181+
182+ en = find_en_by_cn (search_zh )
183+ if en :
184+ # ---- 精确匹配查价 ----
185+ price_counter = get_wfm_prices (en )
186+ if price_counter :
187+ price_list = [f"{ price } p×{ count } 人" for price , count in price_counter .items ()]
188+ # 如果使用了修正后的名称,显示修正信息
189+ if search_zh != zh :
190+ results .append (f"{ search_zh } :{ ', ' .join (price_list )} " )
191+ else :
192+ results .append (f"{ zh } :{ ', ' .join (price_list )} " )
193+ else :
194+ # 如果使用了修正后的名称,显示修正信息
195+ if search_zh != zh :
196+ results .append (f"{ search_zh } :无有效卖单" )
197+ else :
198+ results .append (f"{ zh } :无有效卖单" )
199+ else :
200+ # ---- 模糊搜索1字偏差(忽略空格,不区分大小写) ----
201+ zh_text_nospace = search_zh .replace (' ' , '' ).lower ()
202+ fuzzy_list = []
203+ for zh_db , en_db in cn2url .items ():
204+ zh_db_lower = zh_db .lower ()
205+ if len (zh_db_lower ) == len (zh_text_nospace ):
206+ diff = sum (a != b for a , b in zip (zh_db_lower , zh_text_nospace ))
207+ if diff == 1 :
208+ fuzzy_list .append ((zh_db , en_db ))
209+ if fuzzy_list :
210+ display_name = search_zh if search_zh != zh else zh
211+ results .append (f"模糊搜索 '{ display_name } '结果:" )
212+ for zh_match , en_fuzzy in fuzzy_list :
213+ price_counter = get_wfm_prices (en_fuzzy )
214+ if price_counter :
215+ price_list = [f"{ price } p×{ count } 人" for price , count in price_counter .items ()]
216+ results .append (f" { zh_match } :{ ', ' .join (price_list )} " )
217+ else :
218+ results .append (f" { zh_match } :无有效卖单" )
219+ else :
220+ # ---- 少一字匹配(词库里的词比识别出的词多一个字,且只能是最后一个字,不区分大小写) ----
221+ less_one_list = []
222+ for zh_db , en_db in cn2url .items ():
223+ zh_db_lower = zh_db .lower ()
224+ # 词库中的词长度比识别出的词长度多1
225+ if len (zh_db_lower ) == len (zh_text_nospace ) + 1 :
226+ # 检查词库中的词去掉最后一个字符后是否与识别出的词完全匹配
227+ if zh_db_lower [:- 1 ] == zh_text_nospace :
228+ less_one_list .append ((zh_db , en_db ))
229+
230+ if less_one_list :
231+ display_name = search_zh if search_zh != zh else zh
232+ results .append (f"少一字匹配 '{ display_name } '结果:" )
233+ for zh_match , en_fuzzy in less_one_list :
234+ price_counter = get_wfm_prices (en_fuzzy )
235+ if price_counter :
236+ price_list = [f"{ price } p×{ count } 人" for price , count in price_counter .items ()]
237+ results .append (f" { zh_match } :{ ', ' .join (price_list )} " )
238+ else :
239+ results .append (f" { zh_match } :无有效卖单" )
240+ else :
241+ # 使用修正后的名称显示搜索结果
242+ display_name = search_zh if search_zh != zh else zh
243+ results .append (f"模糊搜索'{ display_name } '无匹配结果" )
244+
245+ return results
246+
247+
248+ # 为了保持向后兼容,如果直接运行此文件,使用默认参数
249+ if __name__ == "__main__" :
250+ results = ocr_and_search_prices ('yellow_on_white_ori.png' )
251+ for result in results :
252+ print (result )
0 commit comments