|
74 | 74 | #include <set> // for std::pair
|
75 | 75 | #include <sstream> // for std::stringstream
|
76 | 76 | #include <vector> // for std::vector
|
| 77 | +#include <cfloat> |
77 | 78 |
|
78 | 79 | #include <allheaders.h> // for pixDestroy, boxCreate, boxaAddBox, box...
|
79 | 80 | #ifdef HAVE_LIBCURL
|
@@ -102,6 +103,8 @@ static STRING_VAR(document_title, "", "Title of output document (used for hOCR a
|
102 | 103 | #ifdef HAVE_LIBCURL
|
103 | 104 | static INT_VAR(curl_timeout, 0, "Timeout for curl in seconds");
|
104 | 105 | #endif
|
| 106 | +double_VAR(allowed_image_memory_capacity, ImageCostEstimate::get_max_system_allowance(), "Set maximum memory allowance for image data: this will be used as part of a sanity check for oversized input images."); |
| 107 | + |
105 | 108 |
|
106 | 109 | /** Minimum sensible image size to be worth running tesseract. */
|
107 | 110 | const int kMinRectSize = 10;
|
@@ -271,6 +274,66 @@ void TessBaseAPI::SetInputName(const char *name) {
|
271 | 274 | input_file_ = name ? name : "";
|
272 | 275 | }
|
273 | 276 |
|
| 277 | +/** |
| 278 | +* Return a memory capacity cost estimate for the given image dimensions and |
| 279 | +* some heuristics re tesseract behaviour, e.g. input images will be normalized/greyscaled, |
| 280 | +* then thresholded, all of which will be kept in memory while the session runs. |
| 281 | +* |
| 282 | +* Also uses the Tesseract Variable `allowed_image_memory_capacity` to indicate |
| 283 | +* whether the estimated cost is oversized --> `cost.is_too_large()` |
| 284 | +* |
| 285 | +* For user convenience, static functions are provided: |
| 286 | +* the static functions MAY be used by userland code *before* the high cost of |
| 287 | +* instantiating a Tesseract instance is incurred. |
| 288 | +*/ |
| 289 | +ImageCostEstimate TessBaseAPI::EstimateImageMemoryCost(int image_width, int image_height, float allowance) { |
| 290 | + // The heuristics used: |
| 291 | + // |
| 292 | + // we reckon with leptonica Pix storage at 4 bytes per pixel, |
| 293 | + // tesseract storing (worst case) 3 different images: original, greyscale, binary thresholded, |
| 294 | + // we DO NOT reckon with the extra image that may serve as background for PDF outputs, etc. |
| 295 | + // we DO NOT reckon with the memory cost for the OCR match tree, etc. |
| 296 | + // However, we attempt a VERY ROUGH estimate by calculating a 20% overdraft for internal operations' |
| 297 | + // storage costs. |
| 298 | + float cost = 4 * 3 * 1.20f; |
| 299 | + cost *= image_width; |
| 300 | + cost *= image_height; |
| 301 | + |
| 302 | + if (allowed_image_memory_capacity > 0.0) { |
| 303 | + // any rediculous input values will be replaced by the Tesseract configuration value: |
| 304 | + if (allowance > allowed_image_memory_capacity || allowance <= 0.0) |
| 305 | + allowance = allowed_image_memory_capacity; |
| 306 | + } |
| 307 | + |
| 308 | + return ImageCostEstimate(cost, allowance); |
| 309 | +} |
| 310 | + |
| 311 | +ImageCostEstimate TessBaseAPI::EstimateImageMemoryCost(const Pix* pix, float allowance) { |
| 312 | + auto w = pixGetWidth(pix); |
| 313 | + auto h = pixGetHeight(pix); |
| 314 | + return EstimateImageMemoryCost(w, h, allowance); |
| 315 | +} |
| 316 | + |
| 317 | +/** |
| 318 | +* Ditto, but this API may be invoked after SetInputImage() or equivalent has been called |
| 319 | +* and reports the cost estimate for the current instance/image. |
| 320 | +*/ |
| 321 | +ImageCostEstimate TessBaseAPI::EstimateImageMemoryCost() const { |
| 322 | + return tesseract_->EstimateImageMemoryCost(); |
| 323 | +} |
| 324 | + |
| 325 | +/** |
| 326 | +* Helper, which may be invoked after SetInputImage() or equivalent has been called: |
| 327 | +* reports the cost estimate for the current instance/image via `tprintf()` and returns |
| 328 | +* `true` when the cost is expected to be too high. |
| 329 | +* |
| 330 | +* You can use this as a fast pre-flight check. Many major tesseract APIs perform |
| 331 | +* this same check as part of their startup routine. |
| 332 | +*/ |
| 333 | +bool TessBaseAPI::CheckAndReportIfImageTooLarge(const Pix* pix) const { |
| 334 | + return tesseract_->CheckAndReportIfImageTooLarge(pix); |
| 335 | +} |
| 336 | + |
274 | 337 | /** Set the name of the output files. Needed only for debugging. */
|
275 | 338 | void TessBaseAPI::SetOutputName(const char *name) {
|
276 | 339 | output_file_ = name ? name : "";
|
@@ -1255,6 +1318,22 @@ bool TessBaseAPI::ProcessPage(Pix *pix, int page_index, const char *filename,
|
1255 | 1318 | TessResultRenderer *renderer) {
|
1256 | 1319 | SetInputName(filename);
|
1257 | 1320 | SetImage(pix);
|
| 1321 | + // Before wee start to do *real* work, do a preliminary sanity check re expected memory pressure. |
| 1322 | + // The check MAY recur in some (semi)public APIs that MAY be called later, but this is the big one |
| 1323 | + // and it's a simple check at negligible cost, saving us some headaches when we start feeding large |
| 1324 | + // material to the Tesseract animal. |
| 1325 | + // |
| 1326 | + // TODO: rescale overlarge input images? Or is that left to userland code? (as it'll be pretty fringe anyway) |
| 1327 | + { |
| 1328 | + auto cost = TessBaseAPI::EstimateImageMemoryCost(pix); |
| 1329 | + std::string cost_report = cost; |
| 1330 | + tprintf("Estimated memory pressure: {} for input image size {} x {} px\n", cost_report, pixGetWidth(pix), pixGetHeight(pix)); |
| 1331 | + |
| 1332 | + if (CheckAndReportIfImageTooLarge(pix)) { |
| 1333 | + return false; // fail early |
| 1334 | + } |
| 1335 | + } |
| 1336 | + |
1258 | 1337 | bool failed = false;
|
1259 | 1338 |
|
1260 | 1339 | if (tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) {
|
|
0 commit comments