File tree Expand file tree Collapse file tree 4 files changed +24
-6
lines changed
Expand file tree Collapse file tree 4 files changed +24
-6
lines changed Original file line number Diff line number Diff line change @@ -16,13 +16,12 @@ async function main() {
1616 }
1717
1818 // Path to Nike's 10-K report
19- const pdfPath = path . join ( __dirname , "../example_data/nke-10k-2023.pdf" ) ;
19+ const pdfPath = path . join ( __dirname , "../../ example_data/nke-10k-2023.pdf" ) ;
2020
2121 // Initialize the loader with specific settings for large documents
2222 const loader = new MistralOcrLoader ( pdfPath , {
2323 apiKey,
2424 splitPages : true ,
25- forceSingleMode : true , // Process each page individually for reliability
2625 modelName : "mistral-ocr-latest" ,
2726 } ) ;
2827
Original file line number Diff line number Diff line change @@ -18,7 +18,10 @@ async function main() {
1818 }
1919
2020 // Path to your PDF file - using the sample file
21- const pdfPath = path . join ( __dirname , "../example_data/file-sample_150kB.pdf" ) ;
21+ const pdfPath = path . join (
22+ __dirname ,
23+ "../../example_data/file-sample_150kB.pdf"
24+ ) ;
2225
2326 // Initialize the loader
2427 const loader = new MistralOcrLoader ( pdfPath , {
Original file line number Diff line number Diff line change 11import path from "path" ;
2- import { getDocument , version } from "pdfjs-dist/legacy/build/pdf.mjs" ;
2+ // Create a factory function to dynamically import pdfjs-dist
3+ async function getPdfLib ( ) {
4+ return import ( "pdfjs-dist/legacy/build/pdf.mjs" ) ;
5+ }
6+
37// CMap and font configurations
48const CMAP_URL = path . join ( process . cwd ( ) , "node_modules/pdfjs-dist/cmaps/" ) ;
59const CMAP_PACKED = true ;
@@ -106,14 +110,17 @@ export async function parsePDF(
106110 ...options ,
107111 } ;
108112
113+ // Dynamically import pdfjs-dist
114+ const { getDocument, version } = await getPdfLib ( ) ;
115+
109116 // Initialize result object
110117 const result : PDFParseResult = {
111118 numpages : 0 ,
112119 numrender : 0 ,
113120 info : null ,
114121 metadata : null ,
115122 text : "" ,
116- version : version , // Hard-coded version as we're not importing the full pdfjsLib
123+ version : version ,
117124 } ;
118125
119126 try {
Original file line number Diff line number Diff line change 11import fs from "fs" ;
22import path from "path" ;
3- import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs" ;
43import sharp from "sharp" ;
4+ // Create a factory function to dynamically import pdfjs-dist
5+ async function getPdfLib ( ) {
6+ return import ( "pdfjs-dist/legacy/build/pdf.mjs" ) ;
7+ }
58
69// CMap and font configurations
710const CMAP_URL = path . join ( process . cwd ( ) , "node_modules/pdfjs-dist/cmaps/" ) ;
@@ -41,6 +44,9 @@ export async function convertPdfToImage(
4144 const quality = options . quality || 100 ;
4245 const pageNumber = options . pageNumber || 1 ;
4346
47+ // Dynamically import pdfjs-dist
48+ const { getDocument } = await getPdfLib ( ) ;
49+
4450 // Load the PDF document
4551 let pdfData : Uint8Array ;
4652 if ( typeof input === "string" ) {
@@ -144,6 +150,9 @@ export async function convertPdfToImages(
144150 pdfData = new Uint8Array ( input ) ;
145151 }
146152
153+ // Dynamically import pdfjs-dist
154+ const { getDocument } = await getPdfLib ( ) ;
155+
147156 const loadingTask = getDocument ( {
148157 data : pdfData ,
149158 cMapUrl : CMAP_URL ,
You can’t perform that action at this time.
0 commit comments