Skip to content

Commit df7ee09

Browse files
committed
refactor: clean up whitespace and remove unused functions in document parser
- Removed unnecessary whitespace in the document import dialog and document parser files for improved readability. - Deleted unused functions related to tag normalization and item deduplication to streamline the codebase.
1 parent 2ec1b7c commit df7ee09

2 files changed

Lines changed: 12 additions & 64 deletions

File tree

src/components/features/document/document-import-dialog.tsx

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ export function DocumentImportDialog({
122122

123123
const importKey = `${file.name}:${file.size}`;
124124
const now = Date.now();
125-
125+
126126
if (
127127
importKey === lastImportKeyRef.current &&
128128
now - lastImportTimeRef.current < 5_000
@@ -131,7 +131,6 @@ export function DocumentImportDialog({
131131
event.target.value = "";
132132
return;
133133
}
134-
135134

136135
lastImportKeyRef.current = importKey;
137136
lastImportTimeRef.current = now;
@@ -161,7 +160,7 @@ export function DocumentImportDialog({
161160

162161
if (!result.success || !result.items) {
163162
if (result.error === "cancelled") return;
164-
163+
165164
const errorMsg =
166165
result.error || "Failed to extract data from document";
167166
setStatus("error");

src/lib/document/document-parser.ts

Lines changed: 10 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -87,35 +87,35 @@ export async function extractTextFromPDF(file: File): Promise<string> {
8787

8888
for (const item of items) {
8989
if (!item.str) continue;
90-
90+
9191
const y = Math.round(item.transform[5]);
92-
92+
9393
if (!lineMap.has(y)) {
9494
lineMap.set(y, []);
9595
}
96-
96+
9797
lineMap.get(y)?.push(item);
9898
}
9999

100100
const sortedYPositions = [...lineMap.keys()].sort((a, b) => b - a);
101101

102102
for (const y of sortedYPositions) {
103103
const lineItems = lineMap.get(y);
104-
104+
105105
if (!lineItems) continue;
106-
106+
107107
lineItems.sort((a, b) => a.transform[4] - b.transform[4]);
108108

109109
let lineText = "";
110110
let lastX = 0;
111-
111+
112112
for (const item of lineItems) {
113113
const x = item.transform[4];
114-
114+
115115
if (lineText && x - lastX > 5) {
116116
lineText += " ";
117117
}
118-
118+
119119
lineText += item.str;
120120
lastX = x + item.width;
121121
}
@@ -130,9 +130,9 @@ export async function extractTextFromPDF(file: File): Promise<string> {
130130

131131
if (allLinks.length > 0) {
132132
const uniqueLinks = [...new Set(allLinks)];
133-
133+
134134
fullText += "\n--- HYPERLINKS FOUND IN DOCUMENT ---\n";
135-
135+
136136
for (const link of uniqueLinks) {
137137
fullText += `${link}\n`;
138138
}
@@ -365,57 +365,6 @@ function deduplicateItems(items: ExtractedItem[]): ExtractedItem[] {
365365
return Array.from(byKey.values());
366366
}
367367

368-
function normalizeTags(tags: string[]): string[] {
369-
const seen = new Set<string>();
370-
371-
for (const tag of tags) {
372-
const normalized = tag.trim().toLowerCase();
373-
374-
if (normalized) {
375-
seen.add(normalized);
376-
}
377-
}
378-
return [...seen];
379-
}
380-
381-
function normalizeKey(item: ExtractedItem): string {
382-
const safe = (value: string) => value.trim().toLowerCase();
383-
384-
return [
385-
safe(item.label),
386-
safe(item.question),
387-
safe(item.answer),
388-
item.category,
389-
]
390-
.map((part) => part || "")
391-
.join("|");
392-
}
393-
394-
function deduplicateItems(items: ExtractedItem[]): ExtractedItem[] {
395-
const byKey = new Map<string, ExtractedItem>();
396-
397-
for (const item of items) {
398-
if (!item.label?.trim() || !item.question?.trim() || !item.answer?.trim()) {
399-
continue;
400-
}
401-
402-
const key = normalizeKey(item);
403-
const existing = byKey.get(key);
404-
405-
if (existing) {
406-
const mergedTags = normalizeTags([
407-
...(existing.tags || []),
408-
...(item.tags || []),
409-
]);
410-
byKey.set(key, { ...existing, tags: mergedTags });
411-
} else {
412-
byKey.set(key, { ...item, tags: normalizeTags(item.tags || []) });
413-
}
414-
}
415-
416-
return Array.from(byKey.values());
417-
}
418-
419368
export function convertToImportItems(
420369
items: ExtractedItem[],
421370
): DocumentImportItem[] {

0 commit comments

Comments
 (0)