Skip to content

Commit 7cb4d29

Browse files
committed
Improve performance of search & Cache search results
1 parent d5c3055 commit 7cb4d29

1 file changed

Lines changed: 51 additions & 23 deletions

File tree

src/components/pdf-viewer.tsx

Lines changed: 51 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import * as React from "react";
2-
import { useEffect, useState, useCallback, useRef } from "react";
2+
import { useEffect, useState, useCallback, useRef, useMemo } from "react";
33
import useGetKnowledgeFileApi from "@/hooks/api/useGetKnowledgeFileApi";
44
import LoadingAnimation from "./loading-animation";
55
import { Document, Page } from "react-pdf";
@@ -85,6 +85,7 @@ const PdfViewer = ({
8585
const [fileInfo, setFileInfo] = useState<FileInfo | null>(null);
8686
const containerRef = useRef<HTMLDivElement>(null);
8787
const [pdfDocument, setPdfDocument] = useState<PDFDocumentProxy | null>(null);
88+
const searchCache = useMemo(() => new Map<string, SearchResult[]>(), []);
8889
const [searchQuery, setSearchQuery] = useState<string>("");
8990
const [searchResults, setSearchResults] = useState<SearchResult[]>([]);
9091
const [currentSearchIndex, setCurrentSearchIndex] = useState<number>(-1);
@@ -181,53 +182,80 @@ const PdfViewer = ({
181182
}
182183
};
183184

185+
const handleSearchResults = (results: SearchResult[], resultIndex = 0) => {
186+
setSearchResults(results);
187+
if (results.length > 0) {
188+
setCurrentSearchIndex(resultIndex);
189+
const firstResultPage = results[resultIndex].pageIndex + 1;
190+
setCurrentPage(firstResultPage);
191+
scrollToPage(firstResultPage);
192+
}
193+
}
194+
195+
/**
196+
* Handles the search for the given term.
197+
* @param term the search term
198+
*/
184199
const handleSearch = async (term: string) => {
200+
// reset search results
201+
setSearchResults([]);
202+
setCurrentSearchIndex(-1);
203+
204+
// return fast if term is empty or pdfDocument is not loaded
185205
if (!term || !pdfDocument) {
186206
setSearchQuery("");
187-
setSearchResults([]);
188-
setCurrentSearchIndex(-1);
189207
return;
190208
}
191209

210+
// perform search
192211
setSearchQuery(term);
193212

194-
term = normalizeText(term).toLowerCase();
195-
const results: SearchResult[] = [];
213+
const normalizedTerm = normalizeText(term.toLowerCase());
214+
const cacheKey = fileUuid + "-" + term;
215+
216+
let results = searchCache.get(cacheKey);
217+
// Check if results are already cached
218+
if (results !== undefined) {
219+
handleSearchResults(results);
220+
221+
return;
222+
}
196223

197-
async function getPageText(page: PDFPageProxy): Promise<string> {
224+
const getPageText = async (page: PDFPageProxy, pageIndex: number) => {
198225
const textContent = await page.getTextContent();
199-
return textContent.items
226+
const pageText = textContent.items
200227
.filter((item): item is TextItem => "str" in item)
201228
.map((item: TextItem) => item.str)
202229
.join(" ");
203-
}
204230

205-
for (let pageIndex = 0; pageIndex < pdfDocument.numPages; pageIndex++) {
206-
const page = await pdfDocument.getPage(pageIndex - 1);
207-
const pageText = await getPageText(page);
231+
return { pageText, pageIndex };
232+
};
233+
234+
// Process pages concurrently but preserve order
235+
const pagePromises = Array.from({ length: pdfDocument.numPages }, (_, i) => pdfDocument.getPage(i + 1).then(page => getPageText(page, i)));
236+
const pagesWithText = await Promise.all(pagePromises);
208237

238+
results = [];
239+
240+
pagesWithText.forEach(({ pageText, pageIndex }) => {
241+
const normalizedPageText = normalizeText(pageText).toLowerCase();
209242
let matchIndex = 0;
210-
let index = normalizeText(pageText).toLowerCase().indexOf(term);
243+
let index = normalizedPageText.indexOf(normalizedTerm);
211244

212245
while (index !== -1) {
213246
results.push({
214247
pageIndex,
215248
matchIndex: matchIndex++,
216249
text: pageText.substring(index, index + term.length),
217250
});
218-
index = normalizeText(pageText)
219-
.toLowerCase()
220-
.indexOf(term, index + 1);
251+
index = normalizedPageText.indexOf(normalizedTerm, index + 1);
221252
}
222-
}
253+
})
223254

224-
setSearchResults(results);
225-
if (results.length > 0) {
226-
setCurrentSearchIndex(0);
227-
const firstResultPage = results[0].pageIndex + 1;
228-
setCurrentPage(firstResultPage);
229-
scrollToPage(firstResultPage);
230-
}
255+
// Store the results in cache
256+
searchCache.set(cacheKey, results);
257+
258+
handleSearchResults(results);
231259
};
232260

233261
const handleNextSearchResult = () => {

0 commit comments

Comments
 (0)