Skip to content

Commit 6967187

Browse files
authored
Prevent 0px left/top OCR words from rendering on the page (#1460)
1 parent cd78183 commit 6967187

File tree

1 file changed

+9
-0
lines changed

1 file changed

+9
-0
lines changed

src/plugins/plugin.text_selection.js

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,15 @@ export class TextSelectionPlugin extends BookReaderPlugin {
137137
if ($textLayers.length) return;
138138
const XMLpage = await this.getPageText(pageIndex);
139139
if (!XMLpage) return;
140+
// Seeing some 0 left and 0 top coordinates in OCR, remove it entirely to prevent odd rendering
141+
// eg https://archive.org/details/illustratedbooko00robe/page/n11/mode/2up
142+
$(XMLpage).find("WORD").filter((_, ele) => {
143+
const [left, , , top] = ele.getAttribute('coords').split(",").map(parseFloat);
144+
if (left == 0 && top == 0) {
145+
console.error("Found invalid ocr word coordinates");
146+
return true;
147+
}
148+
}).remove();
140149
recursivelyAddCoords(XMLpage);
141150

142151
const totalWords = $(XMLpage).find("WORD").length;

0 commit comments

Comments
 (0)