Skip to content

Commit 1cbb6d4

Browse files
authored
Merge pull request #111 from otiai10/develop
2.1.0
2 parents b31e241 + 3dc0985 commit 1cbb6d4

30 files changed

+57
-876
lines changed

README.md

+1-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
> :tada: v2 is released! It contains breaking change. If you still want to use v1, please replace `github.com/otiai10/gosseract` with `github.com/otiai10/gosseract/v1/gosseract` and it is exactly the same thing as v1 implementation_
2-
3-
# Gosseract-OCR
1+
# gosseract OCR
42
[![Build Status](https://travis-ci.org/otiai10/gosseract.svg?branch=master)](https://travis-ci.org/otiai10/gosseract)
53
[![codecov](https://codecov.io/gh/otiai10/gosseract/branch/master/graph/badge.svg)](https://codecov.io/gh/otiai10/gosseract)
64
[![Go Report Card](https://goreportcard.com/badge/github.com/otiai10/gosseract)](https://goreportcard.com/report/github.com/otiai10/gosseract)

all_test.go

+9-3
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ func TestClient_HTML(t *testing.T) {
7979
defer client.Close()
8080
client.SetImage("./test/data/001-gosseract.png")
8181
client.SetWhitelist("otiai10/gosseract")
82-
out, err := client.HTML()
82+
out, err := client.HOCRText()
8383
Expect(t, err).ToBe(nil)
8484

8585
tokenizer := html.NewTokenizer(strings.NewReader(out))
@@ -94,13 +94,19 @@ func TestClient_HTML(t *testing.T) {
9494
Expect(t, texts).ToBe([]string{"otiai10", "/", "gosseract"})
9595

9696
When(t, "only invalid languages are given", func(t *testing.T) {
97+
client := NewClient()
98+
defer client.Close()
9799
client.SetLanguage("foo")
98-
_, err := client.HTML()
100+
client.SetImage("./test/data/001-gosseract.png")
101+
_, err := client.HOCRText()
99102
Expect(t, err).Not().ToBe(nil)
100103
})
101104
When(t, "undefined key-value is tried to be set", func(t *testing.T) {
105+
client := NewClient()
106+
defer client.Close()
102107
client.SetVariable("foobar", "hoge")
103-
_, err := client.HTML()
108+
client.SetImage("./test/data/001-gosseract.png")
109+
_, err := client.HOCRText()
104110
Expect(t, err).Not().ToBe(nil)
105111
})
106112
}

client.go

+47-47
Original file line numberDiff line numberDiff line change
@@ -69,87 +69,87 @@ func NewClient() *Client {
6969
}
7070

7171
// Close frees allocated API. This MUST be called for ANY client constructed by "NewClient" function.
72-
func (c *Client) Close() (err error) {
72+
func (client *Client) Close() (err error) {
7373
// defer func() {
7474
// if e := recover(); e != nil {
7575
// err = fmt.Errorf("%v", e)
7676
// }
7777
// }()
78-
C.Free(c.api)
78+
C.Free(client.api)
7979
return err
8080
}
8181

8282
// SetImage sets path to image file to be processed OCR.
83-
func (c *Client) SetImage(imagepath string) *Client {
84-
c.ImagePath = imagepath
85-
return c
83+
func (client *Client) SetImage(imagepath string) *Client {
84+
client.ImagePath = imagepath
85+
return client
8686
}
8787

8888
// SetLanguage sets languages to use. English as default.
89-
func (c *Client) SetLanguage(langs ...string) *Client {
90-
c.Languages = langs
91-
return c
89+
func (client *Client) SetLanguage(langs ...string) *Client {
90+
client.Languages = langs
91+
return client
9292
}
9393

9494
// SetWhitelist sets whitelist chars.
9595
// See official documentation for whitelist here https://github.com/tesseract-ocr/tesseract/wiki/ImproveQuality#dictionaries-word-lists-and-patterns
96-
func (c *Client) SetWhitelist(whitelist string) *Client {
97-
return c.SetVariable("tessedit_char_whitelist", whitelist)
96+
func (client *Client) SetWhitelist(whitelist string) *Client {
97+
return client.SetVariable("tessedit_char_whitelist", whitelist)
9898
}
9999

100100
// SetVariable sets parameters, representing tesseract::TessBaseAPI->SetVariable.
101101
// See official documentation here https://zdenop.github.io/tesseract-doc/classtesseract_1_1_tess_base_a_p_i.html#a2e09259c558c6d8e0f7e523cbaf5adf5
102-
func (c *Client) SetVariable(key, value string) *Client {
103-
c.Variables[key] = value
104-
return c
102+
func (client *Client) SetVariable(key, value string) *Client {
103+
client.Variables[key] = value
104+
return client
105105
}
106106

107107
// SetPageSegMode sets "Page Segmentation Mode" (PSM) to detect layout of characters.
108108
// See official documentation for PSM here https://github.com/tesseract-ocr/tesseract/wiki/ImproveQuality#page-segmentation-method
109-
func (c *Client) SetPageSegMode(mode PageSegMode) *Client {
110-
c.PageSegMode = &mode
111-
return c
109+
func (client *Client) SetPageSegMode(mode PageSegMode) *Client {
110+
client.PageSegMode = &mode
111+
return client
112112
}
113113

114114
// SetConfigFile sets the file path to config file.
115-
func (c *Client) SetConfigFile(fpath string) error {
115+
func (client *Client) SetConfigFile(fpath string) error {
116116
info, err := os.Stat(fpath)
117117
if err != nil {
118118
return err
119119
}
120120
if info.IsDir() {
121121
return fmt.Errorf("the specified config file path seems to be a directory")
122122
}
123-
c.ConfigFilePath = fpath
123+
client.ConfigFilePath = fpath
124124
return nil
125125
}
126126

127127
// It's due to the caller to free this char pointer.
128-
func (c *Client) charLangs() *C.char {
128+
func (client *Client) charLangs() *C.char {
129129
var langs *C.char
130-
if len(c.Languages) != 0 {
131-
langs = C.CString(strings.Join(c.Languages, "+"))
130+
if len(client.Languages) != 0 {
131+
langs = C.CString(strings.Join(client.Languages, "+"))
132132
}
133133
return langs
134134
}
135135

136136
// It's due to the caller to free this char pointer.
137-
func (c *Client) charConfig() *C.char {
137+
func (client *Client) charConfig() *C.char {
138138
var config *C.char
139-
if _, err := os.Stat(c.ConfigFilePath); err == nil {
140-
config = C.CString(c.ConfigFilePath)
139+
if _, err := os.Stat(client.ConfigFilePath); err == nil {
140+
config = C.CString(client.ConfigFilePath)
141141
}
142142
return config
143143
}
144144

145145
// Initialize tesseract::TessBaseAPI
146146
// TODO: add tessdata prefix
147-
func (c *Client) init() error {
148-
langs := c.charLangs()
147+
func (client *Client) init() error {
148+
langs := client.charLangs()
149149
defer C.free(unsafe.Pointer(langs))
150-
config := c.charConfig()
150+
config := client.charConfig()
151151
defer C.free(unsafe.Pointer(config))
152-
res := C.Init(c.api, nil, langs, config)
152+
res := C.Init(client.api, nil, langs, config)
153153
if res != 0 {
154154
// TODO: capture and vacuum stderr from Cgo
155155
return fmt.Errorf("failed to initialize TessBaseAPI with code %d", res)
@@ -159,59 +159,59 @@ func (c *Client) init() error {
159159

160160
// Prepare tesseract::TessBaseAPI options,
161161
// must be called after `init`.
162-
func (c *Client) prepare() error {
162+
func (client *Client) prepare() error {
163163
// Set Image by giving path
164-
imagepath := C.CString(c.ImagePath)
164+
imagepath := C.CString(client.ImagePath)
165165
defer C.free(unsafe.Pointer(imagepath))
166-
C.SetImage(c.api, imagepath)
166+
C.SetImage(client.api, imagepath)
167167

168-
for key, value := range c.Variables {
169-
if ok := c.bind(key, value); !ok {
168+
for key, value := range client.Variables {
169+
if ok := client.bind(key, value); !ok {
170170
return fmt.Errorf("failed to set variable with key(%s):value(%s)", key, value)
171171
}
172172
}
173173

174-
if c.PageSegMode != nil {
175-
mode := C.int(*c.PageSegMode)
176-
C.SetPageSegMode(c.api, mode)
174+
if client.PageSegMode != nil {
175+
mode := C.int(*client.PageSegMode)
176+
C.SetPageSegMode(client.api, mode)
177177
}
178178
return nil
179179
}
180180

181181
// Binds variable to API object.
182182
// Must be called from inside `prepare`.
183-
func (c *Client) bind(key, value string) bool {
183+
func (client *Client) bind(key, value string) bool {
184184
k, v := C.CString(key), C.CString(value)
185185
defer C.free(unsafe.Pointer(k))
186186
defer C.free(unsafe.Pointer(v))
187-
res := C.SetVariable(c.api, k, v)
187+
res := C.SetVariable(client.api, k, v)
188188
return bool(res)
189189
}
190190

191191
// Text finally initialize tesseract::TessBaseAPI, execute OCR and extract text detected as string.
192-
func (c *Client) Text() (out string, err error) {
193-
if err = c.init(); err != nil {
192+
func (client *Client) Text() (out string, err error) {
193+
if err = client.init(); err != nil {
194194
return
195195
}
196-
if err = c.prepare(); err != nil {
196+
if err = client.prepare(); err != nil {
197197
return
198198
}
199-
out = C.GoString(C.UTF8Text(c.api))
200-
if c.Trim {
199+
out = C.GoString(C.UTF8Text(client.api))
200+
if client.Trim {
201201
out = strings.Trim(out, "\n")
202202
}
203203
return out, err
204204
}
205205

206206
// HTML finally initialize tesseract::TessBaseAPI, execute OCR and returns hOCR text.
207207
// See https://en.wikipedia.org/wiki/HOCR for more information of hOCR.
208-
func (c *Client) HTML() (out string, err error) {
209-
if err = c.init(); err != nil {
208+
func (client *Client) HOCRText() (out string, err error) {
209+
if err = client.init(); err != nil {
210210
return
211211
}
212-
if err = c.prepare(); err != nil {
212+
if err = client.prepare(); err != nil {
213213
return
214214
}
215-
out = C.GoString(C.HOCRText(c.api))
215+
out = C.GoString(C.HOCRText(client.api))
216216
return
217217
}

v1/gosseract/.samples/option/digest000.txt

-1
This file was deleted.

v1/gosseract/.samples/option/digest001.txt

-1
This file was deleted.
-7.37 KB
Binary file not shown.
-5.04 KB
Binary file not shown.
-5.99 KB
Binary file not shown.
-3.5 KB
Binary file not shown.

v1/gosseract/LICENSE

-21
This file was deleted.

v1/gosseract/README.md

-67
This file was deleted.

0 commit comments

Comments
 (0)