@@ -12,6 +12,7 @@ import {
12
12
SDKInitOptions ,
13
13
} from "../types" ;
14
14
import {
15
+ getOptimalSplitSize ,
15
16
getSplitPdfConcurrencyLevel ,
16
17
getStartingPageNumber ,
17
18
loadPdf ,
@@ -83,38 +84,76 @@ export class SplitPdfHook
83
84
( formData . get ( PARTITION_FORM_SPLIT_PDF_PAGE_KEY ) as string ) ?? "false"
84
85
) ;
85
86
const file = formData . get ( PARTITION_FORM_FILES_KEY ) as File | null ;
86
- const startingPageNumber = getStartingPageNumber ( formData ) ;
87
87
88
88
if ( ! splitPdfPage ) {
89
+ console . info ( "Partitioning without split." )
89
90
return request ;
90
91
}
91
92
93
+ console . info ( "Preparing to split document for partition." )
92
94
if ( ! this . client ) {
93
- console . warn ( "HTTP client not accessible! Continuing without splitting ." ) ;
95
+ console . warn ( "HTTP client not accessible! Partitioning without split ." ) ;
94
96
return request ;
95
97
}
96
98
97
99
const [ error , pdf , pagesCount ] = await loadPdf ( file ) ;
98
100
if ( file === null || pdf === null || error ) {
101
+ console . warn ( "File could not be split. Partitioning without split." )
99
102
return request ;
100
103
}
101
104
102
105
if ( pagesCount < MIN_PAGES_PER_THREAD ) {
103
106
console . warn (
104
- `PDF has less than ${ MIN_PAGES_PER_THREAD } pages. Continuing without splitting .`
107
+ `PDF has less than ${ MIN_PAGES_PER_THREAD } pages. Partitioning without split .`
105
108
) ;
106
109
return request ;
107
110
}
108
111
112
+ const startingPageNumber = getStartingPageNumber ( formData ) ;
113
+ console . info ( "Starting page number set to %d" , startingPageNumber ) ;
114
+
109
115
const concurrencyLevel = getSplitPdfConcurrencyLevel ( formData ) ;
110
- const splits = await splitPdf ( pdf , concurrencyLevel ) ;
116
+ console . info ( "Concurrency level set to %d" , concurrencyLevel )
117
+
118
+ const splitSize = await getOptimalSplitSize ( pagesCount , concurrencyLevel ) ;
119
+ console . info ( "Determined optimal split size of %d pages." , splitSize )
120
+
121
+ if ( splitSize >= pagesCount ) {
122
+ console . warn (
123
+ "Document has too few pages (%d) to be split efficiently. Partitioning without split." ,
124
+ pagesCount ,
125
+ )
126
+ return request ;
127
+ }
128
+
129
+ const splits = await splitPdf ( pdf , splitSize ) ;
130
+ const numberOfSplits = splits . length
131
+ console . info (
132
+ "Document split into %d, %d-paged sets." ,
133
+ numberOfSplits ,
134
+ splitSize ,
135
+ )
136
+ console . info (
137
+ "Partitioning %d, %d-paged sets." ,
138
+ numberOfSplits ,
139
+ splitSize ,
140
+ )
141
+
111
142
const headers = prepareRequestHeaders ( request ) ;
112
143
113
144
const requests : Request [ ] = [ ] ;
114
145
146
+ let setIndex = 1
115
147
for ( const { content, startPage } of splits ) {
116
148
// Both startPage and startingPageNumber are 1-based, so we need to subtract 1
117
149
const firstPageNumber = startPage + startingPageNumber - 1 ;
150
+ console . info (
151
+ "Partitioning set #%d (pages %d-%d)." ,
152
+ setIndex ,
153
+ firstPageNumber ,
154
+ Math . min ( firstPageNumber + splitSize - 1 , pagesCount ) ,
155
+ ) ;
156
+
118
157
const body = await prepareRequestBody (
119
158
formData ,
120
159
content ,
@@ -126,6 +165,7 @@ export class SplitPdfHook
126
165
body,
127
166
} ) ;
128
167
requests . push ( req ) ;
168
+ setIndex += 1 ;
129
169
}
130
170
131
171
this . partitionResponses [ operationID ] = new Array ( requests . length ) ;
@@ -173,6 +213,8 @@ export class SplitPdfHook
173
213
174
214
this . clearOperation ( operationID ) ;
175
215
216
+ console . info ( "Successfully processed the request." )
217
+
176
218
return new Response ( body , {
177
219
headers : headers ,
178
220
status : response . status ,
@@ -199,6 +241,7 @@ export class SplitPdfHook
199
241
const responses = await this . awaitAllRequests ( operationID ) ;
200
242
201
243
if ( ! responses ?. length ) {
244
+ console . error ( "Failed to process the request." ) ;
202
245
this . clearOperation ( operationID ) ;
203
246
return { response, error } ;
204
247
}
@@ -214,6 +257,7 @@ export class SplitPdfHook
214
257
} ) ;
215
258
216
259
this . clearOperation ( operationID ) ;
260
+ console . info ( "Successfully processed the request." ) ;
217
261
218
262
return { response : finalResponse , error : null } ;
219
263
}
0 commit comments