Skip to content

Commit 8ecabed

Browse files
committed
fix: use dynamic imports for pdfjs-dist to support both ESM and CommonJS environments
1 parent 3e508ed commit 8ecabed

File tree

4 files changed

+24
-6
lines changed

4 files changed

+24
-6
lines changed

examples/nike-10k.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,12 @@ async function main() {
1616
}
1717

1818
// Path to Nike's 10-K report
19-
const pdfPath = path.join(__dirname, "../example_data/nke-10k-2023.pdf");
19+
const pdfPath = path.join(__dirname, "../../example_data/nke-10k-2023.pdf");
2020

2121
// Initialize the loader with specific settings for large documents
2222
const loader = new MistralOcrLoader(pdfPath, {
2323
apiKey,
2424
splitPages: true,
25-
forceSingleMode: true, // Process each page individually for reliability
2625
modelName: "mistral-ocr-latest",
2726
});
2827

examples/simple.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,10 @@ async function main() {
1818
}
1919

2020
// Path to your PDF file - using the sample file
21-
const pdfPath = path.join(__dirname, "../example_data/file-sample_150kB.pdf");
21+
const pdfPath = path.join(
22+
__dirname,
23+
"../../example_data/file-sample_150kB.pdf"
24+
);
2225

2326
// Initialize the loader
2427
const loader = new MistralOcrLoader(pdfPath, {

src/pdf-utils/pdf-parser/pdf-parser.ts

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
import path from "path";
2-
import { getDocument, version } from "pdfjs-dist/legacy/build/pdf.mjs";
2+
// Create a factory function to dynamically import pdfjs-dist
3+
async function getPdfLib() {
4+
return import("pdfjs-dist/legacy/build/pdf.mjs");
5+
}
6+
37
// CMap and font configurations
48
const CMAP_URL = path.join(process.cwd(), "node_modules/pdfjs-dist/cmaps/");
59
const CMAP_PACKED = true;
@@ -106,14 +110,17 @@ export async function parsePDF(
106110
...options,
107111
};
108112

113+
// Dynamically import pdfjs-dist
114+
const { getDocument, version } = await getPdfLib();
115+
109116
// Initialize result object
110117
const result: PDFParseResult = {
111118
numpages: 0,
112119
numrender: 0,
113120
info: null,
114121
metadata: null,
115122
text: "",
116-
version: version, // Hard-coded version as we're not importing the full pdfjsLib
123+
version: version,
117124
};
118125

119126
try {

src/pdf-utils/pdf-to-image.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
import fs from "fs";
22
import path from "path";
3-
import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs";
43
import sharp from "sharp";
4+
// Create a factory function to dynamically import pdfjs-dist
5+
async function getPdfLib() {
6+
return import("pdfjs-dist/legacy/build/pdf.mjs");
7+
}
58

69
// CMap and font configurations
710
const CMAP_URL = path.join(process.cwd(), "node_modules/pdfjs-dist/cmaps/");
@@ -41,6 +44,9 @@ export async function convertPdfToImage(
4144
const quality = options.quality || 100;
4245
const pageNumber = options.pageNumber || 1;
4346

47+
// Dynamically import pdfjs-dist
48+
const { getDocument } = await getPdfLib();
49+
4450
// Load the PDF document
4551
let pdfData: Uint8Array;
4652
if (typeof input === "string") {
@@ -144,6 +150,9 @@ export async function convertPdfToImages(
144150
pdfData = new Uint8Array(input);
145151
}
146152

153+
// Dynamically import pdfjs-dist
154+
const { getDocument } = await getPdfLib();
155+
147156
const loadingTask = getDocument({
148157
data: pdfData,
149158
cMapUrl: CMAP_URL,

0 commit comments

Comments
 (0)