Skip to content

Commit ab37c1e

Browse files
committed
Fixed Optical Character Recognition and added tests
1 parent c23a8de commit ab37c1e

File tree

4 files changed

+34
-24
lines changed

4 files changed

+34
-24
lines changed

src/core/operations/OpticalCharacterRecognition.mjs

+12-9
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,10 @@ import { isImage } from "../lib/FileType.mjs";
1212
import { toBase64 } from "../lib/Base64.mjs";
1313
import { isWorkerEnvironment } from "../Utils.mjs";
1414

15-
import process from "process";
1615
import { createWorker } from "tesseract.js";
1716

17+
const OEM_MODES = ["Tesseract only", "LSTM only", "Tesseract/LSTM Combined"];
18+
1819
/**
1920
* Optical Character Recognition operation
2021
*/
@@ -37,6 +38,12 @@ class OpticalCharacterRecognition extends Operation {
3738
name: "Show confidence",
3839
type: "boolean",
3940
value: true
41+
},
42+
{
43+
name: "OCR Engine Mode",
44+
type: "option",
45+
value: OEM_MODES,
46+
defaultIndex: 1
4047
}
4148
];
4249
}
@@ -47,7 +54,7 @@ class OpticalCharacterRecognition extends Operation {
4754
* @returns {string}
4855
*/
4956
async run(input, args) {
50-
const [showConfidence] = args;
57+
const [showConfidence, oemChoice] = args;
5158

5259
if (!isWorkerEnvironment()) throw new OperationError("This operation only works in a browser");
5360

@@ -56,12 +63,13 @@ class OpticalCharacterRecognition extends Operation {
5663
throw new OperationError("Unsupported file type (supported: jpg,png,pbm,bmp) or no file provided");
5764
}
5865

59-
const assetDir = isWorkerEnvironment() ? `${self.docURL}/assets/` : `${process.cwd()}/src/core/vendor/`;
66+
const assetDir = `${self.docURL}/assets/`;
67+
const oem = OEM_MODES.indexOf(oemChoice);
6068

6169
try {
6270
self.sendStatusMessage("Spinning up Tesseract worker...");
6371
const image = `data:${type};base64,${toBase64(input)}`;
64-
const worker = createWorker({
72+
const worker = await createWorker("eng", oem, {
6573
workerPath: `${assetDir}tesseract/worker.min.js`,
6674
langPath: `${assetDir}tesseract/lang-data`,
6775
corePath: `${assetDir}tesseract/tesseract-core.wasm.js`,
@@ -71,11 +79,6 @@ class OpticalCharacterRecognition extends Operation {
7179
}
7280
}
7381
});
74-
await worker.load();
75-
self.sendStatusMessage(`Loading English language pack...`);
76-
await worker.loadLanguage("eng");
77-
self.sendStatusMessage("Intialising Tesseract API...");
78-
await worker.initialize("eng");
7982
self.sendStatusMessage("Finding text...");
8083
const result = await worker.recognize(image);
8184

tests/browser/02_ops.js

+19-13
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ module.exports = {
236236
// testOp(browser, "OR", "test input", "test_output");
237237
// testOp(browser, "Object Identifier to Hex", "test input", "test_output");
238238
testOpHtml(browser, "Offset checker", "test input\n\nbest input", ".hl5", "est input");
239-
// testOp(browser, "Optical Character Recognition", "test input", "test_output");
239+
testOpFile(browser, "Optical Character Recognition", "files/testocr.png", false, /This is a lot of 12 point text to test the/, [], 10000);
240240
// testOp(browser, "PEM to Hex", "test input", "test_output");
241241
// testOp(browser, "PGP Decrypt", "test input", "test_output");
242242
// testOp(browser, "PGP Decrypt and Verify", "test input", "test_output");
@@ -408,7 +408,7 @@ module.exports = {
408408
* @param {Browser} browser - Nightwatch client
409409
* @param {string|Array<string>} opName - name of operation to be tested, array for multiple ops
410410
* @param {string} input - input text for test
411-
* @param {Array<string>|Array<Array<string>>} args - arguments, nested if multiple ops
411+
* @param {Array<string>|Array<Array<string>>} [args=[]] - arguments, nested if multiple ops
412412
*/
413413
function bakeOp(browser, opName, input, args=[]) {
414414
browser.perform(function() {
@@ -425,8 +425,8 @@ function bakeOp(browser, opName, input, args=[]) {
425425
* @param {Browser} browser - Nightwatch client
426426
* @param {string|Array<string>} opName - name of operation to be tested, array for multiple ops
427427
* @param {string} input - input text
428-
* @param {string} output - expected output
429-
* @param {Array<string>|Array<Array<string>>} args - arguments, nested if multiple ops
428+
* @param {string|RegExp} output - expected output
429+
* @param {Array<string>|Array<Array<string>>} [args=[]] - arguments, nested if multiple ops
430430
*/
431431
function testOp(browser, opName, input, output, args=[]) {
432432
bakeOp(browser, opName, input, args);
@@ -440,8 +440,8 @@ function testOp(browser, opName, input, output, args=[]) {
440440
* @param {string|Array<string>} opName - name of operation to be tested array for multiple ops
441441
* @param {string} input - input text
442442
* @param {string} cssSelector - CSS selector for HTML output
443-
* @param {string} output - expected output
444-
* @param {Array<string>|Array<Array<string>>} args - arguments, nested if multiple ops
443+
* @param {string|RegExp} output - expected output
444+
* @param {Array<string>|Array<Array<string>>} [args=[]] - arguments, nested if multiple ops
445445
*/
446446
function testOpHtml(browser, opName, input, cssSelector, output, args=[]) {
447447
bakeOp(browser, opName, input, args);
@@ -459,9 +459,9 @@ function testOpHtml(browser, opName, input, cssSelector, output, args=[]) {
459459
* @param {Browser} browser - Nightwatch client
460460
* @param {string|Array<string>} opName - name of operation to be tested array for multiple ops
461461
* @param {string} filename - filename of image file from samples directory
462-
* @param {Array<string>|Array<Array<string>>} args - arguments, nested if multiple ops
462+
* @param {Array<string>|Array<Array<string>>} [args=[]] - arguments, nested if multiple ops
463463
*/
464-
function testOpImage(browser, opName, filename, args) {
464+
function testOpImage(browser, opName, filename, args=[]) {
465465
browser.perform(function() {
466466
console.log(`Current test: ${opName}`);
467467
});
@@ -481,11 +481,12 @@ function testOpImage(browser, opName, filename, args) {
481481
* @param {Browser} browser - Nightwatch client
482482
* @param {string|Array<string>} opName - name of operation to be tested array for multiple ops
483483
* @param {string} filename - filename of file from samples directory
484-
* @param {string} cssSelector - CSS selector for HTML output
485-
* @param {string} output - expected output
486-
* @param {Array<string>|Array<Array<string>>} args - arguments, nested if multiple ops
484+
* @param {string|boolean} cssSelector - CSS selector for HTML output or false for normal text output
485+
* @param {string|RegExp} output - expected output
486+
* @param {Array<string>|Array<Array<string>>} [args=[]] - arguments, nested if multiple ops
487+
* @param {number} [waitWindow=1000] - The number of milliseconds to wait for the output to be correct
487488
*/
488-
function testOpFile(browser, opName, filename, cssSelector, output, args) {
489+
function testOpFile(browser, opName, filename, cssSelector, output, args=[], waitWindow=1000) {
489490
browser.perform(function() {
490491
console.log(`Current test: ${opName}`);
491492
});
@@ -494,9 +495,14 @@ function testOpFile(browser, opName, filename, cssSelector, output, args) {
494495
browser.pause(100).waitForElementVisible("#stale-indicator", 5000);
495496
utils.bake(browser);
496497

497-
if (typeof output === "string") {
498+
if (!cssSelector) {
499+
// Text output
500+
utils.expectOutput(browser, output, true, waitWindow);
501+
} else if (typeof output === "string") {
502+
// HTML output - string match
498503
browser.expect.element("#output-html " + cssSelector).text.that.equals(output);
499504
} else if (output instanceof RegExp) {
505+
// HTML output - RegEx match
500506
browser.expect.element("#output-html " + cssSelector).text.that.matches(output);
501507
}
502508
}

tests/browser/browserUtils.js

+3-2
Original file line numberDiff line numberDiff line change
@@ -180,15 +180,16 @@ function loadRecipe(browser, opName, input, args) {
180180
* @param {Browser} browser - Nightwatch client
181181
* @param {string|RegExp} expected - The expected output value
182182
* @param {boolean} [waitNotNull=false] - Wait for the output to not be empty before testing the value
183+
* @param {number} [waitWindow=1000] - The number of milliseconds to wait for the output to be correct
183184
*/
184-
function expectOutput(browser, expected, waitNotNull=false) {
185+
function expectOutput(browser, expected, waitNotNull=false, waitWindow=1000) {
185186
if (waitNotNull && expected !== "") {
186187
browser.waitUntil(async function() {
187188
const output = await this.execute(function() {
188189
return window.app.manager.output.outputEditorView.state.doc.toString();
189190
});
190191
return output.length;
191-
}, 1000);
192+
}, waitWindow);
192193
}
193194

194195
browser.execute(expected => {

tests/samples/files/testocr.png

22.8 KB
Loading

0 commit comments

Comments
 (0)