@@ -150,6 +150,11 @@ const imageCache = new Map<string, HTMLImageElement>();
150150
151151/** Annotations imported from the PDF file (baseline for diff computation). */
152152let pdfBaselineAnnotations : PdfAnnotationDef [ ] = [ ] ;
153+ /** Pages whose native annotations have already been imported into the baseline. */
154+ const baselineScannedPages = new Set < number > ( ) ;
155+ /** Native-annotation ids the user deleted (from restored localStorage diff) —
156+ * the lazy per-page scan must NOT re-add these to annotationMap. */
157+ const restoredRemovedIds = new Set < string > ( ) ;
153158
154159// Dirty flag — tracks unsaved local changes
155160let isDirty = false ;
@@ -2679,52 +2684,49 @@ function annotationStorageKey(): string | null {
26792684}
26802685
26812686/**
2682- * Import annotations from the loaded PDF to establish the baseline.
2683- * These are the annotations that exist in the PDF file itself.
2687+ * Import one page's native annotations into the baseline. Called lazily from
2688+ * renderPage() so we don't walk every page (and pull most of the file via
2689+ * range requests) before the user sees anything. Idempotent per page.
26842690 */
2685- async function loadBaselineAnnotations (
2686- doc : pdfjsLib . PDFDocumentProxy ,
2687- ) : Promise < void > {
2688- pdfBaselineAnnotations = [ ] ;
2689- for ( let pageNum = 1 ; pageNum <= doc . numPages ; pageNum ++ ) {
2690- try {
2691- const page = await doc . getPage ( pageNum ) ;
2692- const annotations = await page . getAnnotations ( ) ;
2693- for ( let i = 0 ; i < annotations . length ; i ++ ) {
2694- const ann = annotations [ i ] ;
2695- const def = importPdfjsAnnotation ( ann , pageNum , i ) ;
2696- if ( def ) {
2697- pdfBaselineAnnotations . push ( def ) ;
2698- // Add to annotationMap if not already present (from localStorage restore)
2699- if ( ! annotationMap . has ( def . id ) ) {
2700- annotationMap . set ( def . id , { def, elements : [ ] } ) ;
2701- }
2702- } else if ( ann . annotationType !== 20 ) {
2703- // Widget (type 20) is expected to be skipped; anything else we
2704- // don't import will still be painted by page.render() onto the
2705- // canvas as unselectable pixels. Log so we can diagnose
2706- // "ghost annotations" (visible but not in panel, not clickable).
2707- log . info (
2708- `[WARN] Baseline: skipped PDF annotation on page ${ pageNum } ` ,
2709- `type=${ ann . annotationType } ` ,
2710- `subtype=${ ann . subtype ?? "?" } ` ,
2711- `name=${ ann . name ?? "?" } ` ,
2712- `rect=${ ann . rect ? JSON . stringify ( ann . rect ) : "none" } ` ,
2713- ) ;
2714- }
2691+ function scanPageBaselineAnnotations (
2692+ pageNum : number ,
2693+ annotations : unknown [ ] ,
2694+ ) : void {
2695+ if ( baselineScannedPages . has ( pageNum ) ) return ;
2696+ baselineScannedPages . add ( pageNum ) ;
2697+ let imported = 0 ;
2698+ for ( let i = 0 ; i < annotations . length ; i ++ ) {
2699+ const ann = annotations [ i ] as {
2700+ annotationType ?: number ;
2701+ subtype ?: string ;
2702+ name ?: string ;
2703+ rect ?: number [ ] ;
2704+ } ;
2705+ const def = importPdfjsAnnotation ( ann , pageNum , i ) ;
2706+ if ( def ) {
2707+ pdfBaselineAnnotations . push ( def ) ;
2708+ imported ++ ;
2709+ if ( ! annotationMap . has ( def . id ) && ! restoredRemovedIds . has ( def . id ) ) {
2710+ annotationMap . set ( def . id , { def, elements : [ ] } ) ;
27152711 }
2716- } catch ( err ) {
2717- // Log the error — a thrown import for one annotation silently
2718- // drops the REST of that page's annotations too.
2712+ } else if ( ann . annotationType !== 20 ) {
2713+ // Widget (type 20) is expected to be skipped; anything else we
2714+ // don't import will still be painted by page.render() onto the
2715+ // canvas as unselectable pixels. Log so we can diagnose
2716+ // "ghost annotations" (visible but not in panel, not clickable).
27192717 log . info (
2720- `[WARN] Baseline: page ${ pageNum } annotation import failed:` ,
2721- err ,
2718+ `[WARN] Baseline: skipped PDF annotation on page ${ pageNum } ` ,
2719+ `type=${ ann . annotationType } ` ,
2720+ `subtype=${ ann . subtype ?? "?" } ` ,
2721+ `name=${ ann . name ?? "?" } ` ,
2722+ `rect=${ ann . rect ? JSON . stringify ( ann . rect ) : "none" } ` ,
27222723 ) ;
27232724 }
27242725 }
2725- log . info (
2726- `Loaded ${ pdfBaselineAnnotations . length } baseline annotations from PDF` ,
2727- ) ;
2726+ if ( imported > 0 ) {
2727+ updateAnnotationsBadge ( ) ;
2728+ renderAnnotationPanel ( ) ;
2729+ }
27282730}
27292731
27302732function persistAnnotations ( ) : void {
@@ -2765,11 +2767,11 @@ function restoreAnnotations(): void {
27652767 const diff = deserializeDiff ( raw ) ;
27662768
27672769 // Merge baseline + diff. The loop below is add-only, so we MUST also
2768- // delete: loadBaselineAnnotations() runs between the two restore calls
2769- // and re-seeds annotationMap with every baseline id — including the
2770- // ones in diff.removed. Without this, the zombie survives the restore,
2771- // and the next persistAnnotations() sees it in currentIds → computeDiff
2772- // produces removed=[] → the deletion is permanently lost from storage .
2770+ // delete: the per-page baseline scan re-seeds annotationMap with every
2771+ // native id it encounters — including ones in diff.removed. Without the
2772+ // deletes here AND the restoredRemovedIds tombstones below, the zombie
2773+ // survives, and the next persistAnnotations() sees it in currentIds →
2774+ // computeDiff produces removed=[] → the deletion is permanently lost.
27732775 const merged = mergeAnnotations ( pdfBaselineAnnotations , diff ) ;
27742776 for ( const def of merged ) {
27752777 if ( ! annotationMap . has ( def . id ) ) {
@@ -2778,6 +2780,9 @@ function restoreAnnotations(): void {
27782780 }
27792781 for ( const id of diff . removed ) {
27802782 annotationMap . delete ( id ) ;
2783+ // Tombstone so the lazy per-page baseline scan (which runs AFTER this
2784+ // restore) doesn't resurrect it.
2785+ restoredRemovedIds . add ( id ) ;
27812786 }
27822787
27832788 // Restore form fields
@@ -2869,6 +2874,14 @@ async function buildFieldNameMap(
28692874 // getFieldObjects may fail on some PDFs
28702875 }
28712876
2877+ // No AcroForm → nothing to map. Skip the per-page widget walk so form-free
2878+ // PDFs (the common large case) don't pull every page after first paint.
2879+ // getFieldObjects() itself only reads the catalog/AcroForm dict via range
2880+ // transport, so this gate is cheap.
2881+ if ( ! cachedFieldObjects || Object . keys ( cachedFieldObjects ) . length === 0 ) {
2882+ return false ;
2883+ }
2884+
28722885 // Scan every page's widget annotations to collect the CORRECT storage keys,
28732886 // plus labels, pages, positions, AND fieldValue (what the widget renders
28742887 // — which can differ from getFieldObjects().value if the PDF is internally
@@ -3362,6 +3375,9 @@ async function renderPage() {
33623375 formLayerEl . style . setProperty ( "--total-scale-factor" , `${ scale } ` ) ;
33633376 try {
33643377 const annotations = await page . getAnnotations ( ) ;
3378+ // Lazy baseline import — piggyback on the annotations we just fetched
3379+ // for this page instead of walking all pages upfront.
3380+ scanPageBaselineAnnotations ( pageToRender , annotations ) ;
33653381 if ( annotations . length > 0 ) {
33663382 const linkService = {
33673383 getDestinationHash : ( ) => "#" ,
@@ -4406,6 +4422,8 @@ async function reloadPdf(): Promise<void> {
44064422 undoStack . length = 0 ;
44074423 redoStack . length = 0 ;
44084424 pdfBaselineAnnotations = [ ] ;
4425+ baselineScannedPages . clear ( ) ;
4426+ restoredRemovedIds . clear ( ) ;
44094427 pdfBaselineFormValues . clear ( ) ;
44104428 pageTextCache . clear ( ) ;
44114429 pageTextItemsCache . clear ( ) ;
@@ -4449,11 +4467,11 @@ async function reloadPdf(): Promise<void> {
44494467 log . info ( "PDF reloaded:" , totalPages , "pages," , totalBytes , "bytes" ) ;
44504468
44514469 showViewer ( ) ;
4452- // Render immediately — annotation/form scans below are O(numPages) and
4453- // do NOT block the canvas. See same pattern in the initial-load path.
4470+ // Render immediately — baseline-annotation scan now happens per-page
4471+ // inside renderPage(); buildFieldNameMap below early-returns when no
4472+ // AcroForm is present. See same pattern in the initial-load path.
44544473 await renderPage ( ) ;
44554474
4456- await loadBaselineAnnotations ( document ) ;
44574475 const seeded = await buildFieldNameMap ( document ) ;
44584476 syncFormValuesToStorage ( ) ;
44594477 if ( seeded ) await renderPage ( ) ;
@@ -4509,6 +4527,11 @@ async function loadPdfProgressively(urlToLoad: string): Promise<{
45094527 const loadingTask = pdfjsLib . getDocument ( {
45104528 range : transport ,
45114529 standardFontDataUrl : STANDARD_FONT_DATA_URL ,
4530+ // Only fetch ranges renderPage()/getFieldObjects() actually ask for.
4531+ // Without these pdfjs background-prefetches the whole file regardless of
4532+ // the per-page lazy scans below.
4533+ disableAutoFetch : true ,
4534+ disableStream : true ,
45124535 } ) ;
45134536
45144537 try {
@@ -4673,12 +4696,13 @@ app.ontoolresult = async (result: CallToolResult) => {
46734696 scale = fitScale ;
46744697 log . info ( "Initial fit scale:" , scale ) ;
46754698 }
4676- await renderPage ( ) ;
4677-
4678- // Import annotations from the PDF to establish baseline
4679- await loadBaselineAnnotations ( document ) ;
4680- // Restore any persisted user diff
4699+ // Restore any persisted user diff BEFORE first render so the per-page
4700+ // baseline scan inside renderPage() can honour the removed-id tombstones
4701+ // and not resurrect annotations the user deleted last session.
4702+ // restoreAnnotations is sync (localStorage read) so first paint is not
4703+ // delayed.
46814704 restoreAnnotations ( ) ;
4705+ await renderPage ( ) ;
46824706
46834707 // Build field name → annotation ID mapping for form filling
46844708 const seeded = await buildFieldNameMap ( document ) ;
0 commit comments