Skip to content

Commit 3863b7e

Browse files
committed
Add document data inspection and formatted data logging features in DocumentProcessingStatus; enhance logging in formatDocumentsData and updateDocumentStatus functions for better debugging
1 parent 46b25ac commit 3863b7e

File tree

3 files changed

+131
-39
lines changed

3 files changed

+131
-39
lines changed

frontend/src/components/DocumentProcessingStatus.tsx

Lines changed: 100 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import React, { useState, useEffect } from 'react';
22
import { useAuth } from '../contexts/AuthContext';
3-
import { getFirestore, collection, query, onSnapshot, doc } from 'firebase/firestore';
3+
import { getFirestore, collection, query, onSnapshot, doc, getDoc } from 'firebase/firestore';
44
import { getFunctions, httpsCallable } from 'firebase/functions';
55
import DOCUMENT_TYPES, { DocumentType } from '../constants/documentTypes';
66

@@ -192,10 +192,71 @@ const DocumentProcessingStatus: React.FC<DocumentProcessingStatusProps> = ({ onP
192192
return progress;
193193
};
194194

195+
// Function to inspect document data
196+
const inspectDocumentData = async (documentId: string) => {
197+
if (!currentUser) return;
198+
199+
setStatus(`Inspecting document ${documentId}...`);
200+
201+
try {
202+
// Get document from Firestore
203+
const db = getFirestore();
204+
const docRef = doc(db, 'users', currentUser.uid, 'documents', documentId);
205+
const docSnap = await getDoc(docRef);
206+
207+
if (docSnap.exists()) {
208+
const data = docSnap.data();
209+
console.log("===== DOCUMENT DATA =====");
210+
console.log(`Document ID: ${documentId}`);
211+
console.log(`Type: ${data.documentType}`);
212+
console.log(`Status: ${data.status}`);
213+
console.log(`Name: ${data.name}`);
214+
215+
// Log text data if available (limited to first 500 chars for readability)
216+
if (data.text) {
217+
console.log(`Text length: ${data.text.length} characters`);
218+
console.log("First 500 characters:");
219+
console.log(data.text.substring(0, 500) + "...");
220+
}
221+
222+
setStatus(`Document data logged to console for ${documentId}`);
223+
} else {
224+
setError(`Document not found: ${documentId}`);
225+
}
226+
} catch (err: any) {
227+
console.error('Error inspecting document:', err);
228+
setError(`Inspection failed: ${err.message || 'Unknown error'}`);
229+
}
230+
};
231+
232+
// Function to inspect formatted data
233+
const inspectFormattedData = async () => {
234+
if (!currentUser) return;
235+
236+
setStatus('Fetching formatted data...');
237+
238+
try {
239+
const db = getFirestore();
240+
const formattedDataRef = doc(db, 'users', currentUser.uid, 'data', 'formatted_data');
241+
const docSnap = await getDoc(formattedDataRef);
242+
243+
if (docSnap.exists()) {
244+
const data = docSnap.data();
245+
console.log("===== FORMATTED DATA =====");
246+
console.log(JSON.stringify(data.formatted_data, null, 2));
247+
setStatus('Formatted data logged to console');
248+
} else {
249+
console.log("No formatted data document found");
250+
setError('No formatted data available');
251+
}
252+
} catch (err: any) {
253+
console.error('Error fetching formatted data:', err);
254+
setError(`Failed to fetch formatted data: ${err.message || 'Unknown error'}`);
255+
}
256+
};
257+
195258
const progress = calculateProgress();
196259
const canFormat = documentCounts.extracted > 0 && !isFormatting;
197-
// We'll keep this comment to document what we're checking, but remove the unused variable
198-
// const hasErrors = documentCounts.error > 0;
199260

200261
return (
201262
<div style={styles.container}>
@@ -265,13 +326,21 @@ const DocumentProcessingStatus: React.FC<DocumentProcessingStatusProps> = ({ onP
265326
</div>
266327

267328
{canFormat && (
268-
<button
269-
onClick={handleFormatDocuments}
270-
style={styles.formatButton}
271-
disabled={isFormatting}
272-
>
273-
{isFormatting ? 'Formatting...' : 'Format Documents'}
274-
</button>
329+
<div>
330+
<button
331+
onClick={handleFormatDocuments}
332+
style={styles.formatButton}
333+
disabled={isFormatting}
334+
>
335+
{isFormatting ? 'Formatting...' : 'Format Documents'}
336+
</button>
337+
<button
338+
onClick={inspectFormattedData}
339+
style={{...styles.formatButton, backgroundColor: '#666', marginLeft: '10px'}}
340+
>
341+
Inspect Formatted Data
342+
</button>
343+
</div>
275344
)}
276345

277346
{documents.length > 0 && (
@@ -302,22 +371,30 @@ const DocumentProcessingStatus: React.FC<DocumentProcessingStatusProps> = ({ onP
302371
</span>
303372
</td>
304373
<td style={styles.tableCell}>
305-
{doc.status?.toLowerCase() === 'uploaded' && (
374+
<div style={{ display: 'flex', gap: '10px' }}>
375+
{doc.status?.toLowerCase() === 'uploaded' && (
376+
<button
377+
onClick={() => handleRetryProcessing(doc.id)}
378+
style={styles.actionButton}
379+
>
380+
Process Document
381+
</button>
382+
)}
383+
{doc.status?.toLowerCase() === 'error' && (
384+
<button
385+
onClick={() => handleRetryProcessing(doc.id)}
386+
style={styles.actionButton}
387+
>
388+
Retry Processing
389+
</button>
390+
)}
306391
<button
307-
onClick={() => handleRetryProcessing(doc.id)}
308-
style={styles.actionButton}
392+
onClick={() => inspectDocumentData(doc.id)}
393+
style={{...styles.actionButton, backgroundColor: '#666'}}
309394
>
310-
Process Document
395+
Inspect
311396
</button>
312-
)}
313-
{doc.status?.toLowerCase() === 'error' && (
314-
<button
315-
onClick={() => handleRetryProcessing(doc.id)}
316-
style={styles.actionButton}
317-
>
318-
Retry Processing
319-
</button>
320-
)}
397+
</div>
321398
{doc.error && (
322399
<div style={styles.errorMessage}>
323400
Error: {doc.error}

functions-node/formatDocumentsData.js

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ const { DOCUMENT_TYPES, normalizeDocumentType } = require('./constants/documentT
99
* @returns {Promise<Object>} Formatted data for calculations and predictions
1010
*/
1111
exports.formatDocumentsData = async (userId) => {
12-
console.log(`Formatting all document data for user ${userId} using OpenAI`);
12+
console.log(`====== FORMAT DOCUMENTS DATA CALLED - USER ID: ${userId} ======`);
1313

1414
try {
1515
// Get all documents with extracted text
@@ -22,7 +22,11 @@ exports.formatDocumentsData = async (userId) => {
2222
return null;
2323
}
2424

25-
console.log(`Found ${snapshot.size} documents with status 'extracted'`);
25+
console.log(`Found ${snapshot.size} documents with status 'extracted':`);
26+
snapshot.forEach(doc => {
27+
const data = doc.data();
28+
console.log(`- Doc ID: ${doc.id}, Type: ${data.documentType}, Status: ${data.status}, Text length: ${data.text?.length || 0}`);
29+
});
2630

2731
// Organize documents by type
2832
const documentsByType = {};
@@ -69,6 +73,8 @@ exports.formatDocumentsData = async (userId) => {
6973
const prompt = createFormattingPrompt(documentsByType);
7074

7175
// Call OpenAI API
76+
console.log("===== OPENAI PROMPT =====");
77+
console.log(prompt);
7278
console.log('Calling OpenAI for unified data formatting');
7379
const response = await openai.chat.completions.create({
7480
model: "gpt-4o-mini",
@@ -84,8 +90,12 @@ exports.formatDocumentsData = async (userId) => {
8490
});
8591

8692
// Extract and parse the JSON response
93+
console.log("===== OPENAI RESPONSE =====");
94+
console.log(response.choices[0].message.content);
95+
8796
const formattedData = JSON.parse(response.choices[0].message.content);
88-
console.log('Successfully formatted data with OpenAI');
97+
console.log("===== PARSED FORMATTED DATA =====");
98+
console.log(JSON.stringify(formattedData, null, 2));
8999

90100
// Store the formatted data in the user's data document
91101
await storeFormattedData(userId, formattedData);
@@ -215,42 +225,40 @@ async function storeFormattedData(userId, formattedData) {
215225
* @returns {Promise<boolean>} True if any documents were updated
216226
*/
217227
async function updateDocumentStatus(userId, documents) {
218-
console.log(`Attempting to update status for ${documents.length} documents`);
228+
console.log(`===== UPDATE DOCUMENT STATUS - USER ${userId} =====`);
229+
console.log(`Documents to process: ${documents.length}`);
230+
219231
const db = admin.firestore();
220232
const batch = db.batch();
221233

222234
let updateCount = 0;
223235

224236
documents.forEach(doc => {
225-
// Add extra logging to debug
226-
console.log(`Processing document for status update: ${doc.id}`);
237+
const docData = doc.data ? doc.data() : doc;
238+
console.log(`Processing doc ${doc.id}: Type: ${docData.documentType}, Status: ${docData.status}`);
227239

228240
const docRef = db.collection('users').doc(userId).collection('documents').doc(doc.id);
229241

230-
// Handle different document object formats
231-
const docData = doc.data ? doc.data() : doc;
232-
console.log(`Document status before update: ${docData.status}`);
233-
234-
// Only update documents that are in 'extracted' status (case-insensitive)
235242
if (docData.status?.toLowerCase() === 'extracted') {
236243
batch.update(docRef, {
237244
status: 'processed',
238245
processedAt: admin.firestore.FieldValue.serverTimestamp()
239246
});
240247
updateCount++;
241-
console.log(`Marking document ${doc.id} as processed`);
248+
console.log(`Marking document ${doc.id} as processed`);
242249
} else {
243-
console.log(`Skipping document ${doc.id} with status ${docData.status}`);
250+
console.log(`Skipping document ${doc.id} with status ${docData.status}`);
244251
}
245252
});
246253

247254
if (updateCount > 0) {
248255
try {
249256
await batch.commit();
250-
console.log(`Successfully updated ${updateCount} document statuses to processed`);
257+
console.log(`Successfully committed batch update for ${updateCount} documents`);
251258
return true;
252259
} catch (error) {
253-
console.error(`Error committing batch update: ${error}`);
260+
console.error(`✗ Error committing batch update: ${error}`);
261+
console.error(error.stack);
254262
throw error;
255263
}
256264
} else {

functions-node/index.js

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -475,7 +475,8 @@ async function extractTextFromPdf(userId, documentId, filePath) {
475475
* @throws {Error} If processing fails
476476
*/
477477
async function processExtractedText(docs) {
478-
console.log('Processing extracted text into structured data');
478+
console.log("===== PROCESS EXTRACTED TEXT CALLED =====");
479+
console.log(`Documents available: ${Object.keys(docs).join(', ')}`);
479480
const structuredData = {};
480481

481482
// Get API key
@@ -494,25 +495,31 @@ async function processExtractedText(docs) {
494495

495496
// Process syllabus
496497
if (docs.syllabus) {
498+
console.log(`Processing syllabus with ${docs.syllabus.length} characters`);
497499
structuredData.syllabus = await processSyllabusText(docs.syllabus, openai);
500+
console.log("Syllabus processed successfully");
498501
}
499502

500503
// Process transcript
501504
if (docs.transcript) {
505+
console.log(`Processing transcript with ${docs.transcript.length} characters`);
502506
structuredData.transcript = await processTranscriptText(
503507
docs.transcript,
504508
openai,
505509
structuredData.syllabus
506510
);
511+
console.log("Transcript processed successfully");
507512
}
508513

509514
// Process grades
510515
if (docs.grades) {
516+
console.log(`Processing grades with ${docs.grades.length} characters`);
511517
structuredData.grades = await processGradesText(
512518
docs.grades,
513519
openai,
514520
structuredData.syllabus
515521
);
522+
console.log("Grades processed successfully:", JSON.stringify(structuredData.grades, null, 2));
516523
}
517524

518525
console.log('Finished processing extracted text');

0 commit comments

Comments
 (0)