Skip to content

Commit 58853a0

Browse files
CopilotFdawgs
andauthored
feat: add AbortSignal support to all pdf* functions (#742)
* Initial plan * feat: add AbortSignal support to all pdf* functions Co-authored-by: Fdawgs <[email protected]> * test: add AbortSignal tests for pdfInfo and pdfAttach Co-authored-by: Fdawgs <[email protected]> * style: apply prettier formatting Co-authored-by: Fdawgs <[email protected]> * fix: address code review feedback - fix spelling and pdfDetach consistency Co-authored-by: Fdawgs <[email protected]> * fix: improve pdfInfo abort test reliability by aborting synchronously Co-authored-by: Fdawgs <[email protected]> * fix: use setImmediate for pdfInfo abort during test to improve Windows reliability Co-authored-by: Fdawgs <[email protected]> * test: use setimmediate everywhere --------- Co-authored-by: copilot-swe-agent[bot] <[email protected]> Co-authored-by: Fdawgs <[email protected]> Co-authored-by: Frazer Smith <[email protected]>
1 parent fb953a1 commit 58853a0

File tree

2 files changed

+191
-23
lines changed

2 files changed

+191
-23
lines changed

src/index.js

Lines changed: 82 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -489,6 +489,11 @@ const PDF_INFO_PATH_REG = /(.+)pdfinfo/u;
489489
* @property {boolean} [printVersionInfo] Print copyright and version information.
490490
*/
491491

492+
/**
493+
* @typedef {object} PopplerExtraOptions
494+
* @property {AbortSignal} [signal] An `AbortSignal` that can be used to cancel the operation.
495+
*/
496+
492497
/**
493498
* @author Frazer Smith
494499
* @description Executes a Poppler binary with the provided arguments and file input.
@@ -500,11 +505,12 @@ const PDF_INFO_PATH_REG = /(.+)pdfinfo/u;
500505
* @param {boolean} [options.binaryOutput] - Set binary encoding for stdout.
501506
* @param {boolean} [options.ignoreExitCode] - If true, resolve based on stdout presence regardless of exit code.
502507
* @param {boolean} [options.preserveWhitespace] - If true, preserves leading and trailing whitespace in the output.
508+
* @param {AbortSignal} [options.signal] - An `AbortSignal` that can be used to cancel the operation.
503509
* @returns {Promise<string>} A promise that resolves with stdout, or rejects with an Error.
504510
*/
505511
function execBinary(binary, args, file, options = {}) {
506512
return new Promise((resolve, reject) => {
507-
const child = spawn(binary, args);
513+
const child = spawn(binary, args, { signal: options.signal });
508514

509515
if (options.binaryOutput) {
510516
child.stdout.setEncoding("binary");
@@ -517,6 +523,7 @@ function execBinary(binary, args, file, options = {}) {
517523

518524
let stdOut = "";
519525
let stdErr = "";
526+
let errorHandled = false;
520527

521528
child.stdout.on("data", (data) => {
522529
stdOut += data;
@@ -526,7 +533,17 @@ function execBinary(binary, args, file, options = {}) {
526533
stdErr += data;
527534
});
528535

536+
child.on("error", (err) => {
537+
errorHandled = true;
538+
reject(err);
539+
});
540+
529541
child.on("close", (code) => {
542+
// If an error was already emitted, don't process the close event
543+
if (errorHandled) {
544+
return;
545+
}
546+
530547
// For binaries without reliable exit codes, resolve based on stdout presence
531548
if (options.ignoreExitCode) {
532549
if (stdOut !== "") {
@@ -1214,29 +1231,35 @@ class Poppler {
12141231
* @param {string} fileToAttach - Filepath of the attachment to be embedded into the PDF file.
12151232
* @param {string} outputFile - Filepath of the file to output the results to.
12161233
* @param {PdfAttachOptions} [options] - Options to pass to pdfattach binary.
1234+
* @param {PopplerExtraOptions} [extras] - Extra options.
12171235
* @returns {Promise<string>} A promise that resolves with a stdout string, or rejects with an `Error` object.
12181236
*/
1219-
async pdfAttach(file, fileToAttach, outputFile, options = {}) {
1237+
async pdfAttach(file, fileToAttach, outputFile, options = {}, extras = {}) {
1238+
const { signal } = extras;
12201239
const acceptedOptions = this.#getAcceptedOptions("pdfAttach");
12211240
const args = parseOptions(acceptedOptions, options);
12221241
args.push(file, fileToAttach, outputFile);
12231242

1224-
return execBinary(this.#pdfAttachBin, args);
1243+
return execBinary(this.#pdfAttachBin, args, undefined, { signal });
12251244
}
12261245

12271246
/**
12281247
* @author Frazer Smith
12291248
* @description Lists or extracts embedded files (attachments) from a PDF file.
12301249
* @param {string} file - Filepath of the PDF file to read.
12311250
* @param {PdfDetachOptions} [options] - Options to pass to pdfdetach binary.
1251+
* @param {PopplerExtraOptions} [extras] - Extra options.
12321252
* @returns {Promise<string>} A promise that resolves with a stdout string, or rejects with an `Error` object.
12331253
*/
1234-
async pdfDetach(file, options = {}) {
1254+
async pdfDetach(file, options = {}, extras = {}) {
1255+
const { signal } = extras;
12351256
const acceptedOptions = this.#getAcceptedOptions("pdfDetach");
12361257
const args = parseOptions(acceptedOptions, options);
12371258
args.push(file);
12381259

1239-
const { stdout } = await execFileAsync(this.#pdfDetachBin, args);
1260+
const { stdout } = await execFileAsync(this.#pdfDetachBin, args, {
1261+
signal,
1262+
});
12401263
return stdout;
12411264
}
12421265

@@ -1245,15 +1268,17 @@ class Poppler {
12451268
* @description Lists the fonts used in a PDF file along with various information for each font.
12461269
* @param {(Buffer|string)} file - PDF file as Buffer, or filepath of the PDF file to read.
12471270
* @param {PdfFontsOptions} [options] - Options to pass to pdffonts binary.
1271+
* @param {PopplerExtraOptions} [extras] - Extra options.
12481272
* @returns {Promise<string>} A promise that resolves with a stdout string, or rejects with an `Error` object.
12491273
*/
1250-
async pdfFonts(file, options = {}) {
1274+
async pdfFonts(file, options = {}, extras = {}) {
1275+
const { signal } = extras;
12511276
const acceptedOptions = this.#getAcceptedOptions("pdfFonts");
12521277
const versionInfo = await this.#getVersion(this.#pdfFontsBin);
12531278
const args = parseOptions(acceptedOptions, options, versionInfo);
12541279
args.push(Buffer.isBuffer(file) ? "-" : file);
12551280

1256-
return execBinary(this.#pdfFontsBin, args, file);
1281+
return execBinary(this.#pdfFontsBin, args, file, { signal });
12571282
}
12581283

12591284
/**
@@ -1262,9 +1287,11 @@ class Poppler {
12621287
* @param {(Buffer|string)} file - PDF file as Buffer, or filepath of the PDF file to read.
12631288
* @param {string} [outputPrefix] - Filename prefix of output files.
12641289
* @param {PdfImagesOptions} [options] - Options to pass to pdfimages binary.
1290+
* @param {PopplerExtraOptions} [extras] - Extra options.
12651291
* @returns {Promise<string>} A promise that resolves with a stdout string, or rejects with an `Error` object.
12661292
*/
1267-
async pdfImages(file, outputPrefix, options = {}) {
1293+
async pdfImages(file, outputPrefix, options = {}, extras = {}) {
1294+
const { signal } = extras;
12681295
const acceptedOptions = this.#getAcceptedOptions("pdfImages");
12691296
const versionInfo = await this.#getVersion(this.#pdfImagesBin);
12701297
const args = parseOptions(acceptedOptions, options, versionInfo);
@@ -1275,18 +1302,20 @@ class Poppler {
12751302
args.push(outputPrefix);
12761303
}
12771304

1278-
return execBinary(this.#pdfImagesBin, args, file);
1305+
return execBinary(this.#pdfImagesBin, args, file, { signal });
12791306
}
12801307

12811308
/**
12821309
* @author Frazer Smith
12831310
* @description Prints the contents of the `Info` dictionary from a PDF file.
12841311
* @param {(Buffer|string)} file - PDF file as Buffer, or filepath of the PDF file to read.
12851312
* @param {PdfInfoOptions} [options] - Options to pass to pdfinfo binary.
1313+
* @param {PopplerExtraOptions} [extras] - Extra options.
12861314
* @returns {Promise<object|string>} A promise that resolves with a stdout string or JSON object if
12871315
* `options.printAsJson` is `true`, or rejects with an `Error` object.
12881316
*/
1289-
async pdfInfo(file, options = {}) {
1317+
async pdfInfo(file, options = {}, extras = {}) {
1318+
const { signal } = extras;
12901319
const acceptedOptions = this.#getAcceptedOptions("pdfInfo");
12911320
const versionInfo = await this.#getVersion(this.#pdfInfoBin);
12921321
const args = parseOptions(acceptedOptions, options, versionInfo);
@@ -1303,7 +1332,7 @@ class Poppler {
13031332
}
13041333

13051334
return new Promise((resolve, reject) => {
1306-
const child = spawn(this.#pdfInfoBin, args);
1335+
const child = spawn(this.#pdfInfoBin, args, { signal });
13071336

13081337
if (Buffer.isBuffer(file)) {
13091338
child.stdin.write(file);
@@ -1312,6 +1341,7 @@ class Poppler {
13121341

13131342
let stdOut = "";
13141343
let stdErr = "";
1344+
let errorHandled = false;
13151345

13161346
child.stdout.on("data", (data) => {
13171347
stdOut += data;
@@ -1321,7 +1351,17 @@ class Poppler {
13211351
stdErr += data;
13221352
});
13231353

1354+
child.on("error", (err) => {
1355+
errorHandled = true;
1356+
reject(err);
1357+
});
1358+
13241359
child.on("close", (code) => {
1360+
// If an error was already emitted, don't process the close event
1361+
if (errorHandled) {
1362+
return;
1363+
}
1364+
13251365
if (stdOut !== "") {
13261366
if (fileSize) {
13271367
stdOut = stdOut.replace(
@@ -1374,15 +1414,17 @@ class Poppler {
13741414
* since %d is replaced by the page number.
13751415
* As an example, `sample-%d.pdf` will produce `sample-1.pdf` for a single page document.
13761416
* @param {PdfSeparateOptions} [options] - Options to pass to pdfseparate binary.
1417+
* @param {PopplerExtraOptions} [extras] - Extra options.
13771418
* @returns {Promise<string>} A promise that resolves with a stdout string, or rejects with an `Error` object.
13781419
*/
1379-
async pdfSeparate(file, outputPattern, options = {}) {
1420+
async pdfSeparate(file, outputPattern, options = {}, extras = {}) {
1421+
const { signal } = extras;
13801422
const acceptedOptions = this.#getAcceptedOptions("pdfSeparate");
13811423
const versionInfo = await this.#getVersion(this.#pdfSeparateBin);
13821424
const args = parseOptions(acceptedOptions, options, versionInfo);
13831425
args.push(file, outputPattern);
13841426

1385-
return execBinary(this.#pdfSeparateBin, args);
1427+
return execBinary(this.#pdfSeparateBin, args, undefined, { signal });
13861428
}
13871429

13881430
/**
@@ -1397,9 +1439,11 @@ class Poppler {
13971439
*
13981440
* If not set then the output filename will be derived from the PDF file name.
13991441
* @param {PdfToCairoOptions} [options] - Options to pass to pdftocairo binary.
1442+
* @param {PopplerExtraOptions} [extras] - Extra options.
14001443
* @returns {Promise<string>} A promise that resolves with a stdout string, or rejects with an `Error` object.
14011444
*/
1402-
async pdfToCairo(file, outputFile, options = {}) {
1445+
async pdfToCairo(file, outputFile, options = {}, extras = {}) {
1446+
const { signal } = extras;
14031447
const acceptedOptions = this.#getAcceptedOptions("pdfToCairo");
14041448
const versionInfo = await this.#getVersion(this.#pdfToCairoBin);
14051449
const args = parseOptions(acceptedOptions, options, versionInfo);
@@ -1409,7 +1453,10 @@ class Poppler {
14091453
outputFile === undefined &&
14101454
args.some((arg) => ["-singlefile", "-pdf"].includes(arg));
14111455

1412-
return execBinary(this.#pdfToCairoBin, args, file, { binaryOutput });
1456+
return execBinary(this.#pdfToCairoBin, args, file, {
1457+
binaryOutput,
1458+
signal,
1459+
});
14131460
}
14141461

14151462
/**
@@ -1422,9 +1469,11 @@ class Poppler {
14221469
*
14231470
* Required if `file` is a Buffer.
14241471
* @param {PdfToHtmlOptions} [options] - Options to pass to pdftohtml binary.
1472+
* @param {PopplerExtraOptions} [extras] - Extra options.
14251473
* @returns {Promise<string>} A promise that resolves with a stdout string, or rejects with an `Error` object.
14261474
*/
1427-
async pdfToHtml(file, outputFile, options = {}) {
1475+
async pdfToHtml(file, outputFile, options = {}, extras = {}) {
1476+
const { signal } = extras;
14281477
const acceptedOptions = this.#getAcceptedOptions("pdfToHtml");
14291478
const versionInfo = await this.#getVersion(this.#pdfToHtmlBin);
14301479
const args = parseOptions(acceptedOptions, options, versionInfo);
@@ -1436,6 +1485,7 @@ class Poppler {
14361485

14371486
return execBinary(this.#pdfToHtmlBin, args, file, {
14381487
ignoreExitCode: true,
1488+
signal,
14391489
});
14401490
}
14411491

@@ -1447,15 +1497,17 @@ class Poppler {
14471497
* @param {(Buffer|string)} file - PDF file as Buffer, or filepath of the PDF file to read.
14481498
* @param {string} outputPath - Filepath to output the results to.
14491499
* @param {PdfToPpmOptions} [options] - Options to pass to pdftoppm binary.
1500+
* @param {PopplerExtraOptions} [extras] - Extra options.
14501501
* @returns {Promise<string>} A promise that resolves with a stdout string, or rejects with an `Error` object.
14511502
*/
1452-
async pdfToPpm(file, outputPath, options = {}) {
1503+
async pdfToPpm(file, outputPath, options = {}, extras = {}) {
1504+
const { signal } = extras;
14531505
const acceptedOptions = this.#getAcceptedOptions("pdfToPpm");
14541506
const versionInfo = await this.#getVersion(this.#pdfToPpmBin);
14551507
const args = parseOptions(acceptedOptions, options, versionInfo);
14561508
args.push(Buffer.isBuffer(file) ? "-" : file, outputPath);
14571509

1458-
return execBinary(this.#pdfToPpmBin, args, file);
1510+
return execBinary(this.#pdfToPpmBin, args, file, { signal });
14591511
}
14601512

14611513
/**
@@ -1465,15 +1517,17 @@ class Poppler {
14651517
* @param {string} [outputFile] - Filepath of the file to output the results to.
14661518
* If `undefined` then will write output to stdout.
14671519
* @param {PdfToPsOptions} [options] - Options to pass to pdftops binary.
1520+
* @param {PopplerExtraOptions} [extras] - Extra options.
14681521
* @returns {Promise<string>} A promise that resolves with a stdout string, or rejects with an `Error` object.
14691522
*/
1470-
async pdfToPs(file, outputFile, options = {}) {
1523+
async pdfToPs(file, outputFile, options = {}, extras = {}) {
1524+
const { signal } = extras;
14711525
const acceptedOptions = this.#getAcceptedOptions("pdfToPs");
14721526
const versionInfo = await this.#getVersion(this.#pdfToPsBin);
14731527
const args = parseOptions(acceptedOptions, options, versionInfo);
14741528
args.push(Buffer.isBuffer(file) ? "-" : file, outputFile || "-");
14751529

1476-
return execBinary(this.#pdfToPsBin, args, file);
1530+
return execBinary(this.#pdfToPsBin, args, file, { signal });
14771531
}
14781532

14791533
/**
@@ -1483,16 +1537,19 @@ class Poppler {
14831537
* @param {string} [outputFile] - Filepath of the file to output the results to.
14841538
* If `undefined` then will write output to stdout.
14851539
* @param {PdfToTextOptions} [options] - Options to pass to pdftotext binary.
1540+
* @param {PopplerExtraOptions} [extras] - Extra options.
14861541
* @returns {Promise<string>} A promise that resolves with a stdout string, or rejects with an `Error` object.
14871542
*/
1488-
async pdfToText(file, outputFile, options = {}) {
1543+
async pdfToText(file, outputFile, options = {}, extras = {}) {
1544+
const { signal } = extras;
14891545
const acceptedOptions = this.#getAcceptedOptions("pdfToText");
14901546
const versionInfo = await this.#getVersion(this.#pdfToTextBin);
14911547
const args = parseOptions(acceptedOptions, options, versionInfo);
14921548
args.push(Buffer.isBuffer(file) ? "-" : file, outputFile || "-");
14931549

14941550
return execBinary(this.#pdfToTextBin, args, file, {
14951551
preserveWhitespace: options.maintainLayout,
1552+
signal,
14961553
});
14971554
}
14981555

@@ -1504,15 +1561,17 @@ class Poppler {
15041561
* An entire directory of PDF files can be merged like so: `path/to/directory/*.pdf`.
15051562
* @param {string} outputFile - Filepath of the file to output the resulting merged PDF to.
15061563
* @param {PdfUniteOptions} [options] - Options to pass to pdfunite binary.
1564+
* @param {PopplerExtraOptions} [extras] - Extra options.
15071565
* @returns {Promise<string>} A promise that resolves with a stdout string, or rejects with an `Error` object.
15081566
*/
1509-
async pdfUnite(files, outputFile, options = {}) {
1567+
async pdfUnite(files, outputFile, options = {}, extras = {}) {
1568+
const { signal } = extras;
15101569
const acceptedOptions = this.#getAcceptedOptions("pdfUnite");
15111570
const versionInfo = await this.#getVersion(this.#pdfUniteBin);
15121571
const args = parseOptions(acceptedOptions, options, versionInfo);
15131572
args.push(...files, outputFile);
15141573

1515-
return execBinary(this.#pdfUniteBin, args);
1574+
return execBinary(this.#pdfUniteBin, args, undefined, { signal });
15161575
}
15171576
}
15181577

0 commit comments

Comments
 (0)