|
1 | 1 | import * as React from "react"; |
2 | | -import { useEffect, useState, useCallback, useRef } from "react"; |
| 2 | +import { useEffect, useState, useCallback, useRef, useMemo } from "react"; |
3 | 3 | import useGetKnowledgeFileApi from "@/hooks/api/useGetKnowledgeFileApi"; |
4 | 4 | import LoadingAnimation from "./loading-animation"; |
5 | 5 | import { Document, Page } from "react-pdf"; |
@@ -85,6 +85,7 @@ const PdfViewer = ({ |
85 | 85 | const [fileInfo, setFileInfo] = useState<FileInfo | null>(null); |
86 | 86 | const containerRef = useRef<HTMLDivElement>(null); |
87 | 87 | const [pdfDocument, setPdfDocument] = useState<PDFDocumentProxy | null>(null); |
| 88 | + const searchCache = useMemo(() => new Map<string, SearchResult[]>(), []); |
88 | 89 | const [searchQuery, setSearchQuery] = useState<string>(""); |
89 | 90 | const [searchResults, setSearchResults] = useState<SearchResult[]>([]); |
90 | 91 | const [currentSearchIndex, setCurrentSearchIndex] = useState<number>(-1); |
@@ -181,53 +182,80 @@ const PdfViewer = ({ |
181 | 182 | } |
182 | 183 | }; |
183 | 184 |
|
| 185 | + const handleSearchResults = (results: SearchResult[], resultIndex = 0) => { |
| 186 | + setSearchResults(results); |
| 187 | + if (results.length > 0) { |
| 188 | + setCurrentSearchIndex(resultIndex); |
| 189 | + const firstResultPage = results[resultIndex].pageIndex + 1; |
| 190 | + setCurrentPage(firstResultPage); |
| 191 | + scrollToPage(firstResultPage); |
| 192 | + } |
| 193 | + } |
| 194 | + |
| 195 | + /** |
| 196 | + * Handles the search for the given term. |
| 197 | + * @param term the search term |
| 198 | + */ |
184 | 199 | const handleSearch = async (term: string) => { |
| 200 | + // reset search results |
| 201 | + setSearchResults([]); |
| 202 | + setCurrentSearchIndex(-1); |
| 203 | + |
| 204 | + // return fast if term is empty or pdfDocument is not loaded |
185 | 205 | if (!term || !pdfDocument) { |
186 | 206 | setSearchQuery(""); |
187 | | - setSearchResults([]); |
188 | | - setCurrentSearchIndex(-1); |
189 | 207 | return; |
190 | 208 | } |
191 | 209 |
|
| 210 | + // perform search |
192 | 211 | setSearchQuery(term); |
193 | 212 |
|
194 | | - term = normalizeText(term).toLowerCase(); |
195 | | - const results: SearchResult[] = []; |
| 213 | + const normalizedTerm = normalizeText(term.toLowerCase()); |
| 214 | + const cacheKey = fileUuid + "-" + term; |
| 215 | + |
| 216 | + let results = searchCache.get(cacheKey); |
| 217 | + // Check if results are already cached |
| 218 | + if (results !== undefined) { |
| 219 | + handleSearchResults(results); |
| 220 | + |
| 221 | + return; |
| 222 | + } |
196 | 223 |
|
197 | | - async function getPageText(page: PDFPageProxy): Promise<string> { |
| 224 | + const getPageText = async (page: PDFPageProxy, pageIndex: number) => { |
198 | 225 | const textContent = await page.getTextContent(); |
199 | | - return textContent.items |
| 226 | + const pageText = textContent.items |
200 | 227 | .filter((item): item is TextItem => "str" in item) |
201 | 228 | .map((item: TextItem) => item.str) |
202 | 229 | .join(" "); |
203 | | - } |
204 | 230 |
|
205 | | - for (let pageIndex = 0; pageIndex < pdfDocument.numPages; pageIndex++) { |
206 | | - const page = await pdfDocument.getPage(pageIndex - 1); |
207 | | - const pageText = await getPageText(page); |
| 231 | + return { pageText, pageIndex }; |
| 232 | + }; |
| 233 | + |
| 234 | + // Process pages concurrently but preserve order |
| 235 | + const pagePromises = Array.from({ length: pdfDocument.numPages }, (_, i) => pdfDocument.getPage(i + 1).then(page => getPageText(page, i))); |
| 236 | + const pagesWithText = await Promise.all(pagePromises); |
208 | 237 |
|
| 238 | + results = []; |
| 239 | + |
| 240 | + pagesWithText.forEach(({ pageText, pageIndex }) => { |
| 241 | + const normalizedPageText = normalizeText(pageText).toLowerCase(); |
209 | 242 | let matchIndex = 0; |
210 | | - let index = normalizeText(pageText).toLowerCase().indexOf(term); |
| 243 | + let index = normalizedPageText.indexOf(normalizedTerm); |
211 | 244 |
|
212 | 245 | while (index !== -1) { |
213 | 246 | results.push({ |
214 | 247 | pageIndex, |
215 | 248 | matchIndex: matchIndex++, |
216 | 249 | text: pageText.substring(index, index + term.length), |
217 | 250 | }); |
218 | | - index = normalizeText(pageText) |
219 | | - .toLowerCase() |
220 | | - .indexOf(term, index + 1); |
| 251 | + index = normalizedPageText.indexOf(normalizedTerm, index + 1); |
221 | 252 | } |
222 | | - } |
| 253 | + }) |
223 | 254 |
|
224 | | - setSearchResults(results); |
225 | | - if (results.length > 0) { |
226 | | - setCurrentSearchIndex(0); |
227 | | - const firstResultPage = results[0].pageIndex + 1; |
228 | | - setCurrentPage(firstResultPage); |
229 | | - scrollToPage(firstResultPage); |
230 | | - } |
| 255 | + // Store the results in cache |
| 256 | + searchCache.set(cacheKey, results); |
| 257 | + |
| 258 | + handleSearchResults(results); |
231 | 259 | }; |
232 | 260 |
|
233 | 261 | const handleNextSearchResult = () => { |
|
0 commit comments