Skip to content

Commit 472fcd7

Browse files
committed
Enhance document processing flow by adding checks for syllabus completion and improving logging; normalize document types for consistency in formatting and status updates.
1 parent 96068b8 commit 472fcd7

File tree

3 files changed

+123
-77
lines changed

3 files changed

+123
-77
lines changed

frontend/src/components/DocumentProcessingStatus.tsx

Lines changed: 73 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ const DocumentProcessingStatus: React.FC<DocumentProcessingStatusProps> = ({ onP
2121
const { currentUser } = useAuth();
2222
const [documents, setDocuments] = useState<Document[]>([]);
2323
const [isFormatting, setIsFormatting] = useState<boolean>(false);
24+
const [processingComplete, setProcessingComplete] = useState<boolean>(false);
2425
const [error, setError] = useState<string | null>(null);
2526
const [status, setStatus] = useState<string>('');
2627

@@ -35,44 +36,88 @@ const DocumentProcessingStatus: React.FC<DocumentProcessingStatusProps> = ({ onP
3536
const documentsRef = collection(userDocRef, 'documents');
3637
const q = query(documentsRef);
3738

39+
console.log('Setting up document status listener');
40+
3841
const unsubscribe = onSnapshot(q, (snapshot) => {
3942
const docs: Document[] = [];
43+
44+
console.log(`Received ${snapshot.docs.length} documents from Firestore`);
45+
4046
snapshot.forEach(doc => {
47+
const data = doc.data();
48+
console.log(`Document ${doc.id}, Type: ${data.documentType}, Status: ${data.status}`);
49+
4150
docs.push({
4251
id: doc.id,
43-
...doc.data() as Omit<Document, 'id'>
52+
...data as Omit<Document, 'id'>
4453
});
4554
});
4655

56+
// Update the documents state
4757
setDocuments(docs);
4858

49-
// Check if all documents are processed
50-
const allProcessed = docs.length > 0 && docs.every(doc => doc.status === 'processed');
51-
if (allProcessed && onProcessingComplete) {
52-
onProcessingComplete();
59+
if (docs.length > 0) {
60+
// Force case-insensitive comparison for document types
61+
const documentTypeCount = {
62+
syllabus: docs.filter(doc => doc.documentType?.toLowerCase() === DOCUMENT_TYPES.SYLLABUS).length,
63+
transcript: docs.filter(doc => doc.documentType?.toLowerCase() === DOCUMENT_TYPES.TRANSCRIPT).length,
64+
grades: docs.filter(doc => doc.documentType?.toLowerCase() === DOCUMENT_TYPES.GRADES).length
65+
};
66+
67+
// Force case-insensitive comparison for document status
68+
const statusCount = {
69+
uploaded: docs.filter(d => d.status?.toLowerCase() === 'uploaded').length,
70+
extracted: docs.filter(d => d.status?.toLowerCase() === 'extracted').length,
71+
processed: docs.filter(d => d.status?.toLowerCase() === 'processed').length,
72+
error: docs.filter(d => d.status?.toLowerCase() === 'error').length
73+
};
74+
75+
console.log('Document counts by type:', documentTypeCount);
76+
console.log('Document counts by status:', statusCount);
77+
78+
// Check for processing completion based on processed syllabus
79+
const hasSyllabus = docs.some(doc =>
80+
doc.documentType?.toLowerCase() === DOCUMENT_TYPES.SYLLABUS &&
81+
doc.status?.toLowerCase() === 'processed'
82+
);
83+
84+
console.log('Has processed syllabus:', hasSyllabus);
85+
86+
// Trigger completion if we have a processed syllabus
87+
if (hasSyllabus && onProcessingComplete && !processingComplete) {
88+
console.log('Processing complete condition met - has processed syllabus');
89+
onProcessingComplete();
90+
setProcessingComplete(true);
91+
}
5392
}
93+
}, (error) => {
94+
console.error('Error in document snapshot listener:', error);
95+
setError('Error monitoring document status');
5496
});
5597

5698
return () => unsubscribe();
5799
}, [currentUser, db, onProcessingComplete]);
58100

59-
// Count documents by status
101+
// Calculate document counts with case-insensitive comparison
60102
const documentCounts = {
61-
uploaded: documents.filter(doc => doc.status === 'uploaded').length,
62-
extracted: documents.filter(doc => doc.status === 'extracted').length,
63-
processed: documents.filter(doc => doc.status === 'processed').length,
64-
error: documents.filter(doc => doc.status === 'error').length
103+
uploaded: documents.filter(doc => doc.status?.toLowerCase() === 'uploaded').length,
104+
extracted: documents.filter(doc => doc.status?.toLowerCase() === 'extracted').length,
105+
processed: documents.filter(doc => doc.status?.toLowerCase() === 'processed').length,
106+
error: documents.filter(doc => doc.status?.toLowerCase() === 'error').length
65107
};
66108

67-
// Count documents by type (case-insensitive)
109+
// Count documents by type with case-insensitive comparison
68110
const documentTypeCount = {
69-
syllabus: documents.filter(doc => doc.documentType.toLowerCase() === DOCUMENT_TYPES.SYLLABUS).length,
70-
transcript: documents.filter(doc => doc.documentType.toLowerCase() === DOCUMENT_TYPES.TRANSCRIPT).length,
71-
grades: documents.filter(doc => doc.documentType.toLowerCase() === DOCUMENT_TYPES.GRADES).length
111+
syllabus: documents.filter(doc => doc.documentType?.toLowerCase() === DOCUMENT_TYPES.SYLLABUS).length,
112+
transcript: documents.filter(doc => doc.documentType?.toLowerCase() === DOCUMENT_TYPES.TRANSCRIPT).length,
113+
grades: documents.filter(doc => doc.documentType?.toLowerCase() === DOCUMENT_TYPES.GRADES).length
72114
};
73115

74-
// Check if we have the minimum required documents (case-insensitive)
75-
const hasSyllabus = documents.some(doc => doc.documentType.toLowerCase() === DOCUMENT_TYPES.SYLLABUS);
116+
// Check if we have the minimum required documents with case-insensitive comparison
117+
const hasSyllabus = documents.some(doc =>
118+
doc.documentType?.toLowerCase() === DOCUMENT_TYPES.SYLLABUS &&
119+
doc.status?.toLowerCase() === 'processed'
120+
);
76121
const hasMinimumDocuments = hasSyllabus;
77122

78123
// Handle manual formatting
@@ -123,10 +168,12 @@ const DocumentProcessingStatus: React.FC<DocumentProcessingStatusProps> = ({ onP
123168
}
124169
};
125170

126-
// Calculate overall progress
171+
// Calculate overall progress with case-insensitive comparison and logging
127172
const calculateProgress = () => {
128173
if (documents.length === 0) return 0;
129174

175+
console.log('Calculating progress with', documents.length, 'documents');
176+
130177
const totalSteps = documents.length * 2; // Upload + Process for each document
131178
let completedSteps = 0;
132179

@@ -135,12 +182,14 @@ const DocumentProcessingStatus: React.FC<DocumentProcessingStatusProps> = ({ onP
135182
completedSteps += 1;
136183

137184
// Count processing step for extracted or processed documents
138-
if (doc.status === 'extracted' || doc.status === 'processed') {
185+
if (doc.status?.toLowerCase() === 'extracted' || doc.status?.toLowerCase() === 'processed') {
139186
completedSteps += 1;
140187
}
141188
});
142189

143-
return Math.round((completedSteps / totalSteps) * 100);
190+
const progress = Math.round((completedSteps / totalSteps) * 100);
191+
console.log(`Progress calculation: ${completedSteps}/${totalSteps} = ${progress}%`);
192+
return progress;
144193
};
145194

146195
const progress = calculateProgress();
@@ -245,23 +294,23 @@ const DocumentProcessingStatus: React.FC<DocumentProcessingStatusProps> = ({ onP
245294
<td style={styles.tableCell}>
246295
<span style={{
247296
...styles.statusBadge,
248-
backgroundColor: doc.status === 'processed' ? '#4caf50' :
249-
doc.status === 'extracted' ? '#ff9800' :
250-
doc.status === 'error' ? '#f44336' : '#2196f3'
297+
backgroundColor: doc.status?.toLowerCase() === 'processed' ? '#4caf50' :
298+
doc.status?.toLowerCase() === 'extracted' ? '#ff9800' :
299+
doc.status?.toLowerCase() === 'error' ? '#f44336' : '#2196f3'
251300
}}>
252301
{doc.status}
253302
</span>
254303
</td>
255304
<td style={styles.tableCell}>
256-
{doc.status === 'uploaded' && (
305+
{doc.status?.toLowerCase() === 'uploaded' && (
257306
<button
258307
onClick={() => handleRetryProcessing(doc.id)}
259308
style={styles.actionButton}
260309
>
261310
Process Document
262311
</button>
263312
)}
264-
{doc.status === 'error' && (
313+
{doc.status?.toLowerCase() === 'error' && (
265314
<button
266315
onClick={() => handleRetryProcessing(doc.id)}
267316
style={styles.actionButton}

functions-node/formatDocumentsData.js

Lines changed: 32 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,6 @@ const functions = require('firebase-functions');
33
const admin = require('firebase-admin');
44
const { DOCUMENT_TYPES, normalizeDocumentType } = require('./constants/documentTypes');
55

6-
/**
7-
* Formats all document data using a single OpenAI API call to ensure consistent structure
8-
* @param {string} userId - The user ID
9-
* @returns {Promise<Object>} Formatted data for calculations and predictions
10-
*/
116
/**
127
* Formats all document data using a single OpenAI API call to ensure consistent structure
138
* @param {string} userId - The user ID
@@ -17,7 +12,7 @@ exports.formatDocumentsData = async (userId) => {
1712
console.log(`Formatting all document data for user ${userId} using OpenAI`);
1813

1914
try {
20-
// Get all processed documents with extracted text
15+
// Get all documents with extracted text
2116
const db = admin.firestore();
2217
const documentsRef = db.collection('users').doc(userId).collection('documents');
2318
const snapshot = await documentsRef.where('status', '==', 'extracted').get();
@@ -27,12 +22,16 @@ exports.formatDocumentsData = async (userId) => {
2722
return null;
2823
}
2924

25+
console.log(`Found ${snapshot.size} documents with status 'extracted'`);
26+
3027
// Organize documents by type
3128
const documentsByType = {};
3229
snapshot.forEach(doc => {
3330
const data = doc.data();
3431
if (data.documentType && data.text) {
35-
documentsByType[data.documentType] = {
32+
// Use normalized document type
33+
const normalizedType = normalizeDocumentType(data.documentType);
34+
documentsByType[normalizedType] = {
3635
id: doc.id,
3736
text: data.text,
3837
...data
@@ -92,7 +91,8 @@ exports.formatDocumentsData = async (userId) => {
9291
await storeFormattedData(userId, formattedData);
9392

9493
// Update the status of all processed documents
95-
await updateDocumentStatus(userId, snapshot.docs);
94+
const updateResult = await updateDocumentStatus(userId, snapshot.docs);
95+
console.log(`Document status update result: ${updateResult}`);
9696

9797
return formattedData;
9898
} catch (error) {
@@ -103,21 +103,16 @@ exports.formatDocumentsData = async (userId) => {
103103
}
104104
};
105105

106-
/**
107-
* Creates the prompt for OpenAI formatting
108-
* @param {Object} documentsByType - Documents organized by type
109-
* @returns {string} Formatted prompt
110-
*/
111106
/**
112107
* Creates the prompt for OpenAI formatting
113108
* @param {Object} documentsByType - Documents organized by type
114109
* @returns {string} Formatted prompt
115110
*/
116111
function createFormattingPrompt(documentsByType) {
117112
// Extract the document texts
118-
const syllabusText = documentsByType.syllabus ? documentsByType.syllabus.text : '';
119-
const gradesText = documentsByType.grades ? documentsByType.grades.text : '';
120-
const transcriptText = documentsByType.transcript ? documentsByType.transcript.text : '';
113+
const syllabusText = documentsByType[DOCUMENT_TYPES.SYLLABUS]?.text || '';
114+
const gradesText = documentsByType[DOCUMENT_TYPES.GRADES]?.text || '';
115+
const transcriptText = documentsByType[DOCUMENT_TYPES.TRANSCRIPT]?.text || '';
121116

122117
return `
123118
I need you to format educational document data into a consistent structure for grade calculations and predictions.
@@ -190,12 +185,6 @@ For the academicHistory.relevantCourses, analyze the transcript to find courses
190185
`;
191186
}
192187

193-
/**
194-
* Stores the formatted data in Firestore
195-
* @param {string} userId - The user ID
196-
* @param {Object} formattedData - The formatted data
197-
* @returns {Promise<void>}
198-
*/
199188
/**
200189
* Stores the formatted data in Firestore
201190
* @param {string} userId - The user ID
@@ -223,27 +212,27 @@ async function storeFormattedData(userId, formattedData) {
223212
* Updates the status of processed documents
224213
* @param {string} userId - The user ID
225214
* @param {Array} documents - The document snapshots
226-
* @returns {Promise<void>}
227-
*/
228-
/**
229-
* Updates the status of processed documents
230-
* @param {string} userId - The user ID
231-
* @param {Array} documents - The document snapshots
232-
* @returns {Promise<void>}
215+
* @returns {Promise<boolean>} True if any documents were updated
233216
*/
234217
async function updateDocumentStatus(userId, documents) {
235-
console.log(`Updating status for ${documents.length} documents`);
218+
console.log(`Attempting to update status for ${documents.length} documents`);
236219
const db = admin.firestore();
237220
const batch = db.batch();
238221

239222
let updateCount = 0;
240223

241224
documents.forEach(doc => {
225+
// Add extra logging to debug
226+
console.log(`Processing document for status update: ${doc.id}`);
227+
242228
const docRef = db.collection('users').doc(userId).collection('documents').doc(doc.id);
243-
const docData = doc.data();
244229

245-
// Only update documents that are in 'extracted' status
246-
if (docData.status === 'extracted') {
230+
// Handle different document object formats
231+
const docData = doc.data ? doc.data() : doc;
232+
console.log(`Document status before update: ${docData.status}`);
233+
234+
// Only update documents that are in 'extracted' status (case-insensitive)
235+
if (docData.status?.toLowerCase() === 'extracted') {
247236
batch.update(docRef, {
248237
status: 'processed',
249238
processedAt: admin.firestore.FieldValue.serverTimestamp()
@@ -256,20 +245,20 @@ async function updateDocumentStatus(userId, documents) {
256245
});
257246

258247
if (updateCount > 0) {
259-
await batch.commit();
260-
console.log(`Successfully updated ${updateCount} document statuses to processed`);
261-
return true;
248+
try {
249+
await batch.commit();
250+
console.log(`Successfully updated ${updateCount} document statuses to processed`);
251+
return true;
252+
} catch (error) {
253+
console.error(`Error committing batch update: ${error}`);
254+
throw error;
255+
}
262256
} else {
263257
console.log('No documents to update');
264258
return false;
265259
}
266260
}
267261

268-
/**
269-
* Creates a fallback formatted data structure if OpenAI fails
270-
* @param {Object} documentsByType - Documents organized by type
271-
* @returns {Object} Fallback formatted data
272-
*/
273262
/**
274263
* Creates a fallback formatted data structure if OpenAI fails
275264
* @param {Object} documentsByType - Documents organized by type
@@ -279,7 +268,7 @@ function createFallbackFormattedData(documentsByType) {
279268
console.log('Creating fallback formatted data');
280269

281270
// Extract basic information using regex patterns
282-
const syllabusText = documentsByType.syllabus ? documentsByType.syllabus.text : '';
271+
const syllabusText = documentsByType[DOCUMENT_TYPES.SYLLABUS]?.text || '';
283272

284273
// Extract course name
285274
const courseNameMatch = syllabusText.match(/course(?:\s+title)?:?\s*([^\n]+)/i);
@@ -297,7 +286,7 @@ function createFallbackFormattedData(documentsByType) {
297286
const gradeWeights = extractGradeWeights(syllabusText);
298287

299288
// Extract GPA from transcript
300-
const transcriptText = documentsByType.transcript ? documentsByType.transcript.text : '';
289+
const transcriptText = documentsByType[DOCUMENT_TYPES.TRANSCRIPT]?.text || '';
301290
const gpaMatch = transcriptText.match(/gpa:?\s*([\d\.]+)/i);
302291
const gpa = gpaMatch ? gpaMatch[1].trim() : "3.0";
303292

@@ -321,11 +310,6 @@ function createFallbackFormattedData(documentsByType) {
321310
};
322311
}
323312

324-
/**
325-
* Extract grade weights using regex patterns
326-
* @param {string} text - Text to extract grade weights from
327-
* @returns {Array} Array of {name, weight} objects
328-
*/
329313
/**
330314
* Extract grade weights using regex patterns
331315
* @param {string} text - Text to extract grade weights from
@@ -372,9 +356,6 @@ function extractGradeWeights(text) {
372356
}
373357
}
374358

375-
/**
376-
* Helper function to get OpenAI API key
377-
*/
378359
/**
379360
* Helper function to get OpenAI API key
380361
*/

0 commit comments

Comments
 (0)