Skip to content

Commit c68bfd6

Browse files
committed
Refactor document processing by removing Google Cloud Vision API dependency; add Node.js functions for PDF text extraction and update Firebase configuration
1 parent 33046ee commit c68bfd6

File tree

12 files changed

+3827
-394
lines changed

12 files changed

+3827
-394
lines changed

firebase.json

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,24 @@
66
"functions": [
77
{
88
"source": "functions",
9-
"codebase": "default",
9+
"codebase": "python",
1010
"ignore": [
1111
"venv",
1212
".git",
1313
"firebase-debug.log",
1414
"firebase-debug.*.log",
1515
"*.local"
1616
]
17+
},
18+
{
19+
"source": "functions-node",
20+
"codebase": "nodejs",
21+
"ignore": [
22+
"node_modules",
23+
".git",
24+
"firebase-debug.log",
25+
"firebase-debug.*.log"
26+
]
1727
}
1828
],
1929
"hosting": {

frontend/src/components/DocumentUploader.tsx

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -104,11 +104,24 @@ const DocumentUploader: React.FC = () => {
104104
const data = result.data as any;
105105

106106
if (data.success) {
107-
setProcessingStatus('Document uploaded successfully. Processing...');
107+
setProcessingStatus('Document uploaded successfully. Extracting text...');
108108

109-
// Poll for document status
110-
const documentId = data.documentId;
111-
checkDocumentStatus(documentId);
109+
// Call the new Node.js text extraction function
110+
const extractText = httpsCallable(functions, 'extractPdfText');
111+
const extractResult = await extractText({
112+
documentType: documentType
113+
});
114+
115+
const extractData = extractResult.data as any;
116+
117+
if (extractData.success) {
118+
setProcessingStatus(`Successfully extracted ${extractData.textLength} characters. Predicting grade...`);
119+
predictGrade();
120+
} else {
121+
setError('Text extraction failed: ' + (extractData.message || 'Unknown error'));
122+
setProcessingStatus('');
123+
setIsUploading(false);
124+
}
112125
} else {
113126
setError('Upload failed: ' + (data.message || 'Unknown error'));
114127
setProcessingStatus('');

functions-node/index.js

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
const functions = require('firebase-functions');
2+
const admin = require('firebase-admin');
3+
const pdfParse = require('pdf-parse');
4+
const tmp = require('tmp');
5+
const fs = require('fs');
6+
7+
admin.initializeApp();
8+
9+
exports.extractPdfText = functions.https.onCall(async (data, context) => {
10+
// Ensure user is authenticated
11+
if (!context.auth) {
12+
throw new functions.https.HttpsError(
13+
'unauthenticated',
14+
'User must be authenticated'
15+
);
16+
}
17+
18+
const userId = context.auth.uid;
19+
const { documentType } = data;
20+
21+
if (!documentType || !['syllabus', 'transcript'].includes(documentType)) {
22+
throw new functions.https.HttpsError(
23+
'invalid-argument',
24+
'Valid document type (syllabus or transcript) is required'
25+
);
26+
}
27+
28+
try {
29+
console.log(`Starting text extraction for ${documentType} from user ${userId}`);
30+
31+
// Get document info from Firestore
32+
const db = admin.firestore();
33+
const docRef = db.collection('users').doc(userId).collection('documents').doc(documentType);
34+
const doc = await docRef.get();
35+
36+
if (!doc.exists) {
37+
throw new functions.https.HttpsError(
38+
'not-found',
39+
`${documentType} not found`
40+
);
41+
}
42+
43+
const docData = doc.data();
44+
const filePath = docData.filePath;
45+
46+
if (!filePath) {
47+
throw new functions.https.HttpsError(
48+
'not-found',
49+
`File path not found for ${documentType}`
50+
);
51+
}
52+
53+
console.log(`Downloading PDF from Firebase Storage: ${filePath}`);
54+
55+
// Download file from Firebase Storage
56+
const bucket = admin.storage().bucket();
57+
const tempFile = tmp.fileSync({ postfix: '.pdf' });
58+
59+
await bucket.file(filePath).download({
60+
destination: tempFile.name
61+
});
62+
63+
console.log(`PDF downloaded to temporary file: ${tempFile.name}`);
64+
65+
// Extract text using pdf-parse
66+
const dataBuffer = fs.readFileSync(tempFile.name);
67+
const pdfData = await pdfParse(dataBuffer);
68+
69+
console.log(`Successfully extracted ${pdfData.text.length} characters of text`);
70+
71+
// Clean up the temp file
72+
tempFile.removeCallback();
73+
74+
// Update document in Firestore with extracted text
75+
await docRef.update({
76+
text: pdfData.text,
77+
lastExtracted: admin.firestore.FieldValue.serverTimestamp(),
78+
status: 'processed'
79+
});
80+
81+
return {
82+
success: true,
83+
documentType,
84+
message: `Successfully extracted text from ${documentType}`,
85+
textLength: pdfData.text.length
86+
};
87+
} catch (error) {
88+
console.error(`Error extracting text from PDF: ${error}`);
89+
throw new functions.https.HttpsError(
90+
'internal',
91+
`Error extracting text from ${documentType}: ${error.message}`
92+
);
93+
}
94+
});
95+
96+
// Add a function that works with the PDF upload trigger
97+
exports.processPdfUpload = functions.storage.object().onFinalize(async (object) => {
98+
const filePath = object.name;
99+
100+
// Only process PDFs in the user's directory
101+
if (!filePath || !filePath.startsWith('users/') || !filePath.endsWith('.pdf')) {
102+
return null;
103+
}
104+
105+
// Extract user ID and document type from path
106+
// Expected format: users/{userId}/{documentType}/{filename}.pdf
107+
const pathParts = filePath.split('/');
108+
if (pathParts.length < 4) {
109+
return null;
110+
}
111+
112+
const userId = pathParts[1];
113+
const documentType = pathParts[2]; // "syllabus" or "transcript"
114+
115+
try {
116+
console.log(`Processing uploaded PDF: ${filePath}`);
117+
118+
// Store basic information in Firestore
119+
const db = admin.firestore();
120+
const docRef = db.collection('users').doc(userId).collection('documents').doc(documentType);
121+
122+
await docRef.set({
123+
filePath: filePath,
124+
uploadedAt: admin.firestore.FieldValue.serverTimestamp(),
125+
status: 'uploaded'
126+
});
127+
128+
console.log(`PDF upload metadata saved to Firestore for ${filePath}`);
129+
return null;
130+
} catch (error) {
131+
console.error(`Error processing uploaded PDF: ${error}`);
132+
return null;
133+
}
134+
});

0 commit comments

Comments
 (0)