1
1
package com .luooqi .ocr .utils ;
2
2
3
3
import cn .hutool .core .codec .Base64 ;
4
+ import cn .hutool .core .lang .UUID ;
4
5
import cn .hutool .core .util .CharsetUtil ;
5
6
import cn .hutool .core .util .StrUtil ;
7
+ import cn .hutool .core .util .URLUtil ;
6
8
import cn .hutool .crypto .SecureUtil ;
7
9
import cn .hutool .http .HttpRequest ;
8
10
import cn .hutool .http .HttpResponse ;
12
14
import cn .hutool .json .JSONUtil ;
13
15
import com .luooqi .ocr .model .TextBlock ;
14
16
17
+ import java .awt .*;
15
18
import java .util .*;
19
+ import java .util .List ;
16
20
17
21
/**
18
22
* tools-ocr
19
23
* Created by 何志龙 on 2019-03-22.
20
24
*/
21
25
public class OcrUtils {
22
26
27
+ public static String ocrImg (byte [] imgData ) {
28
+ int i = Math .abs (UUID .randomUUID ().hashCode ()) % 4 ;
29
+ switch (i ){
30
+ case 0 :
31
+ return bdGeneralOcr (imgData );
32
+ case 1 :
33
+ return bdAccurateOcr (imgData );
34
+ case 2 :
35
+ return sogouMobileOcr (imgData );
36
+ default :
37
+ return sogouWebOcr (imgData );
38
+ }
39
+ }
40
+
41
+ private static String bdGeneralOcr (byte [] imgData ){
42
+ return bdBaseOcr (imgData , "general_location" );
43
+ }
44
+
45
+ private static String bdAccurateOcr (byte [] imgData ){
46
+ return bdBaseOcr (imgData , "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate" );
47
+ }
48
+
49
+ private static String bdBaseOcr (byte [] imgData , String type ){
50
+ String [] urlArr = new String []{"http://ai.baidu.com/tech/ocr/general" , "http://ai.baidu.com/index/seccode?action=show" };
51
+ StringBuilder cookie = new StringBuilder ();
52
+ for (String url : urlArr ) {
53
+ HttpResponse cookieResp = WebUtils .get (url );
54
+ List <String > ckList = cookieResp .headerList ("Set-Cookie" );
55
+ for (String s : ckList ) {
56
+ cookie .append (s .replaceAll ("expires[\\ S\\ s]+" , "" ));
57
+ }
58
+ }
59
+ HashMap <String , String > header = new HashMap <>();
60
+ header .put ("Referer" , "http://ai.baidu.com/tech/ocr/general" );
61
+ header .put ("Cookie" , cookie .toString ());
62
+ String data = "type=" +URLUtil .encodeQuery (type )+"&detect_direction=false&image_url&image=" + URLUtil .encodeQuery ("data:image/jpeg;base64," + Base64 .encode (imgData )) + "&language_type=CHN_ENG" ;
63
+ HttpResponse response = WebUtils .postRaw ("http://ai.baidu.com/aidemo" , data , 0 , header );
64
+ return extractBdResult (WebUtils .getSafeHtml (response ));
65
+ }
66
+
23
67
public static String sogouMobileOcr (byte [] imgData ) {
24
68
String boundary = "------WebKitFormBoundary8orYTmcj8BHvQpVU" ;
25
69
String url = "http://ocr.shouji.sogou.com/v2/ocr/json" ;
@@ -59,7 +103,7 @@ private static String extractSogouResult(String html) {
59
103
}
60
104
JSONArray jsonArray = jsonObject .getJSONArray ("result" );
61
105
List <TextBlock > textBlocks = new ArrayList <>();
62
- boolean isEng = false ;
106
+ boolean isEng ;
63
107
for (int i = 0 ; i < jsonArray .size (); i ++) {
64
108
JSONObject jObj = jsonArray .getJSONObject (i );
65
109
TextBlock textBlock = new TextBlock ();
@@ -76,4 +120,34 @@ private static String extractSogouResult(String html) {
76
120
return CommUtils .combineTextBlocks (textBlocks , isEng );
77
121
}
78
122
123
+ private static String extractBdResult (String html ) {
124
+ if (StrUtil .isBlank (html )) {
125
+ return "" ;
126
+ }
127
+ JSONObject jsonObject = JSONUtil .parseObj (html );
128
+ if (jsonObject .getInt ("errno" , 0 ) != 0 ) {
129
+ return "" ;
130
+ }
131
+ JSONArray jsonArray = jsonObject .getJSONObject ("data" ).getJSONArray ("words_result" );
132
+ List <TextBlock > textBlocks = new ArrayList <>();
133
+ boolean isEng = false ;
134
+ for (int i = 0 ; i < jsonArray .size (); i ++) {
135
+ JSONObject jObj = jsonArray .getJSONObject (i );
136
+ TextBlock textBlock = new TextBlock ();
137
+ textBlock .setText (jObj .getStr ("words" ).trim ());
138
+ //noinspection SuspiciousToArrayCall
139
+ JSONObject location = jObj .getJSONObject ("location" );
140
+ int top = location .getInt ("top" );
141
+ int left = location .getInt ("left" );
142
+ int width = location .getInt ("width" );
143
+ int height = location .getInt ("height" );
144
+ textBlock .setTopLeft (new Point (top , left ));
145
+ textBlock .setTopRight (new Point (top , left + width ));
146
+ textBlock .setBottomLeft (new Point (top + height , left ));
147
+ textBlock .setBottomRight (new Point (top + height , left + width ));
148
+ textBlocks .add (textBlock );
149
+ }
150
+ return CommUtils .combineTextBlocks (textBlocks , isEng );
151
+ }
152
+
79
153
}
0 commit comments