Skip to content

feat: add rotation angle recognition for rotated CAPTCHA #259

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
recursive-include ddddocr common.onnx
recursive-include ddddocr common_old.onnx
recursive-include ddddocr common_det.onnx
recursive-include ddddocr common_det.onnx
recursive-include ddddocr common_rot.onnx
46 changes: 40 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ DdddOcr、最简依赖的理念,尽量减少用户的配置和使用成本,

</p>


## 目录

- [赞助合作商](#赞助合作商)
Expand Down Expand Up @@ -112,6 +112,7 @@ ddddocr
│ │── __init__.py 主代码库文件
│ │── common.onnx 新ocr模型
│ │── common_det.onnx 目标检测模型
│ │── common_rot.onnx 旋转图片模型
│ │── common_old.onnx 老ocr模型
│ │── logo.png
│ │── README.md
Expand Down Expand Up @@ -258,7 +259,7 @@ cv2.imwrite("result.jpg", im)
res = det.slide_match(target_bytes, background_bytes)

print(res)
```
```
由于滑块图可能存在透明边框的问题,导致计算结果不一定准确,需要自行估算滑块图透明边框的宽度用于修正得出的bbox

*提示:如果滑块无过多背景部分,则可以添加simple_target参数, 通常为jpg或者bmp格式的图片*
Expand All @@ -275,7 +276,7 @@ cv2.imwrite("result.jpg", im)
res = slide.slide_match(target_bytes, background_bytes, simple_target=True)

print(res)
```
```

**a.算法2**

Expand Down Expand Up @@ -303,7 +304,7 @@ cv2.imwrite("result.jpg", im)
res = slide.slide_comparison(target_bytes, background_bytes)

print(res)
```
```

##### Ⅳ. OCR概率输出

Expand Down Expand Up @@ -365,6 +366,40 @@ print(res)

```

##### Ⅵ. 旋转图片

返回图片需要旋转多少度才是正的图片, 可以保存调试图片

```python
import ddddocr
import time

rot = ddddocr.DdddOcr(rot=True)
with open("test.jpg", 'rb') as f:
image = f.read()

runs = 100
start = time.time()
for _ in range(runs):
degree = rot.rotate(image, save_rot=False)
end = time.time()
total_time = end - start
qps = runs / total_time
print(f"ONNX Runtime (CPU) - QPS: {qps:.2f}, Avg Latency: {total_time / runs * 1000:.2f} ms")

# ONNX Runtime (CPU) - QPS: 34.34, Avg Latency: 29.12 ms # 包含图片预处理和后处理(保存)
# ONNX Runtime (CPU) - QPS: 38.39, Avg Latency: 26.05 ms # 包含图片预处理、不含后处理(保存)
```

**参考例图**

包括且不限于以下图片

![原图](images/test.jpg)

![旋转后](images/debug.jpg)


### 版本控制

该项目使用Git进行版本管理。您可以在repository参看当前可用版本。
Expand All @@ -388,7 +423,7 @@ print(res)
### 作者

[email protected]

<img src="https://cdn.wenanzhe.com/img/mmqrcode1640418911274.png!/scale/50" alt="wechat" width="150">

*好友数过多不一定通过,有问题可以在issue进行交流*
Expand Down Expand Up @@ -421,4 +456,3 @@ [email protected]

[![Star History Chart](https://api.star-history.com/svg?repos=sml2h3/ddddocr&type=Date)](https://star-history.com/#sml2h3/ddddocr&Date)


78 changes: 70 additions & 8 deletions ddddocr/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@
from PIL import Image, ImageChops
import numpy as np
import cv2

import math

onnxruntime.set_default_logger_severity(3)


def base64_to_image(img_base64):
img_data = base64.b64decode(img_base64)
return Image.open(io.BytesIO(img_data))
Expand All @@ -39,7 +40,7 @@ def png_rgba_black_preprocess(img: Image):


class DdddOcr(object):
def __init__(self, ocr: bool = True, det: bool = False, old: bool = False, beta: bool = False,
def __init__(self, ocr: bool = True, det: bool = False, rot: bool = False, old: bool = False, beta: bool = False,
use_gpu: bool = False,
device_id: int = 0, show_ad=True, import_onnx_path: str = "", charsets_path: str = ""):
if show_ad:
Expand All @@ -54,11 +55,12 @@ def __init__(self, ocr: bool = True, det: bool = False, old: bool = False, beta:
self.__word = False
self.__resize = []
self.__charset_range = []
self.__valid_charset_range_index = [] # 指定字符对应的有效索引
self.__valid_charset_range_index = [] # 指定字符对应的有效索引
self.__channel = 1
if import_onnx_path != "":
det = False
ocr = False
rot = False
self.__graph_path = import_onnx_path
with open(charsets_path, 'r', encoding="utf-8") as f:
info = json.loads(f.read())
Expand All @@ -67,9 +69,14 @@ def __init__(self, ocr: bool = True, det: bool = False, old: bool = False, beta:
self.__resize = info['image']
self.__channel = info['channel']
self.use_import_onnx = True

if rot:
ocr = False
det = False
self.__graph_path = os.path.join(os.path.dirname(__file__), 'common_rot.onnx')
self.__charset = []
if det:
ocr = False
rot = False
self.__graph_path = os.path.join(os.path.dirname(__file__), 'common_det.onnx')
self.__charset = []
if ocr:
Expand Down Expand Up @@ -2401,6 +2408,7 @@ def __init__(self, ocr: bool = True, det: bool = False, old: bool = False, beta:
"窭", "铌",
"友", "唉", "怫", "荘"]
self.det = det
self.rot = rot
if use_gpu:
self.__providers = [
('CUDAExecutionProvider', {
Expand All @@ -2415,7 +2423,7 @@ def __init__(self, ocr: bool = True, det: bool = False, old: bool = False, beta:
self.__providers = [
'CPUExecutionProvider',
]
if ocr or det or self.use_import_onnx:
if rot or ocr or det or self.use_import_onnx:
self.__ort_session = onnxruntime.InferenceSession(self.__graph_path, providers=self.__providers)

def preproc(self, img, input_size, swap=(2, 0, 1)):
Expand Down Expand Up @@ -2605,7 +2613,6 @@ def set_ranges(self, charset_range):
pass
self.__valid_charset_range_index = valid_charset_range_index


def classification(self, img, png_fix: bool = False, probability=False):
if self.det:
raise TypeError("当前识别类型为目标检测")
Expand Down Expand Up @@ -2681,7 +2688,7 @@ def classification(self, img, png_fix: bool = False, probability=False):
valid_charset_range_index = self.__valid_charset_range_index
probability_result = []
for item in ort_outs_probability:
probability_result.append([item[i] for i in valid_charset_range_index ])
probability_result.append([item[i] for i in valid_charset_range_index])
result['probability'] = probability_result
return result
else:
Expand Down Expand Up @@ -2723,12 +2730,67 @@ def classification(self, img, png_fix: bool = False, probability=False):

def detection(self, img_bytes: bytes = None, img_base64: str = None):
if not self.det:
raise TypeError("当前识别类型为文字识别")
raise TypeError("当前识别类型为文字识别或图片旋转")
if not img_bytes:
img_bytes = base64.b64decode(img_base64)
result = self.get_bbox(img_bytes)
return result

def rotate(self, img_bytes: bytes = None, img_base64: str = None, save_rot: bool = False):
"""
返回图片应该旋转的角度
:param img_bytes: open(test.jpg).read()
:param img_base64:
:param save_rot: 为True则保存旋转以后的图片到项目路径,默认False
:return:
"""
# 由于弟弟库维护人员过多,rotate没有调用和修改其他方法(如preproc)
# 如果我要调用preproc,那么就得修改preproc
# 可能导致其他兼容问题,所以全部写在rotate

if not self.rot:
raise TypeError("当前识别类型为文字识别或目标检测")
if not img_bytes:
img_bytes = base64.b64decode(img_base64)
image = cv2.imdecode(np.frombuffer(img_bytes, np.uint16), cv2.IMREAD_COLOR)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
src_size, src_w = image.shape[:2]
assert src_size == src_w
# 中心裁剪
output_size = int(src_size / math.sqrt(2.0))
img = image[(image.shape[0] - output_size) // 2: (image.shape[0] + output_size) // 2,
(image.shape[1] - output_size) // 2: (image.shape[1] + output_size) // 2]
# 归一化
img = np.array(img).astype(np.float32) / 255.0

# 缩放
input_size = (224,224)
img = cv2.resize(img, input_size, interpolation=cv2.INTER_LANCZOS4)

# 标准化
mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
img = (img - mean) / std

# 转换为C, H, W
im = img.transpose((2, 0, 1))

ort_inputs = {self.__ort_session.get_inputs()[0].name: im[None, :, :, :]}
cls_num = self.__ort_session.get_outputs()[0].shape[1]
output = self.__ort_session.run(None, ort_inputs)
angle = output[0].argmax(1).item() / cls_num
degree = angle * 360
if save_rot:
center = (src_size // 2, src_w // 2)
M = cv2.getRotationMatrix2D(center, -degree, 1.0)
rotated = cv2.warpAffine(
image, M, (src_w, src_size), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=(255, 255, 255)
)
# 保存旋转后的图像
rotated = cv2.cvtColor(rotated, cv2.COLOR_RGB2BGR)
cv2.imwrite("debug.jpg", rotated)
return degree

def get_target(self, img_bytes: bytes = None):
image = Image.open(io.BytesIO(img_bytes))
w, h = image.size
Expand Down
Binary file added ddddocr/common_rot.onnx
Binary file not shown.
Binary file added images/debug.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/test.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.