MSU-AI
diff --git a/‎functions/README.md‎
Lines changed: 99 additions & 0 deletions b/‎functions/README.md‎
Lines changed: 99 additions & 0 deletions
diff --git a/‎functions/combined_predict.py‎
Lines changed: 157 additions & 0 deletions b/‎functions/combined_predict.py‎
Lines changed: 157 additions & 0 deletions
@@ -0,0 +1,99 @@
+# Firebase Cloud Functions for Grade Prediction
+
+This directory contains the Firebase Cloud Functions for the grade prediction application. These functions handle PDF processing, OCR, grade prediction, and data storage.
+
+## Overview
+
+The application uses Firebase Cloud Functions to:
+
+1. Upload and process PDF documents (syllabus and transcript)
+2. Extract text from PDFs using OCR
+3. Predict final grades using both ChatGPT and Linear Regression models
+4. Store predictions and analyses in Firestore
+
+## Function Categories
+
+### Authentication Functions
+
+- `create_user_profile`: Create a user profile after registration
+- `delete_user_data`: Delete a user's data when account is deleted
+- `get_user_profile`: Get a user's profile data
+- `update_user_profile`: Update a user's profile data
+
+### OCR Functions
+
+- `extract_text_from_pdf`: Extract text from a PDF file
+- `process_syllabus`: Process syllabus PDF to extract course information
+- `process_transcript`: Process transcript PDF to extract grade information
+- `process_uploaded_pdf`: Triggered when a PDF is uploaded to Firebase Storage
+
+### OpenAI API Functions
+
+- `analyze_grades`: Analyze extracted grade data using OpenAI API
+- `predict_final_grade`: Predict final grade based on current grades and weights
+- `extract_assignments`: Extract upcoming assignments and exams from syllabus
+
+### Storage Functions
+
+- `get_upload_url`: Generate a signed URL for uploading a PDF to Firebase Storage
+
+### ML Prediction Functions
+
+- `predict_with_linear_regression`: Predict final grade using Linear Regression model
+- `add_training_data`: Add new training data for the linear regression model
+
+### Combined Prediction Functions
+
+- `get_combined_prediction`: Get combined prediction from both ChatGPT and Linear Regression models
+- `get_latest_predictions`: Get the latest predictions for a user
+
+### Document Processing Functions
+
+- `upload_and_process_document`: Upload and process a document (syllabus or transcript)
+- `get_document_status`: Get the status of a document upload and processing
+- `get_user_documents`: Get all documents uploaded by a user
+
+## Workflow
+
+1. User uploads a syllabus and transcript PDF
+2. PDFs are stored in Firebase Storage
+3. OCR extracts text from PDFs
+4. Extracted text is processed to get structured data
+5. Structured data is used to predict final grades
+6. Predictions are stored in Firestore
+
+## Database Structure
+
+### Firestore Collections
+
+- `/users/{userId}`: User profile data
+- `/users/{userId}/documents/{documentType}`: Extracted text from PDFs
+- `/users/{userId}/syllabi/{syllabusId}`: Structured syllabus data
+- `/users/{userId}/transcripts/{transcriptId}`: Structured transcript data
+- `/users/{userId}/predictions/{predictionId}`: ChatGPT predictions
+- `/users/{userId}/ml_predictions/{predictionId}`: Linear Regression predictions
+- `/users/{userId}/combined_predictions/{predictionId}`: Combined predictions
+- `/users/{userId}/analyses/{analysisId}`: Grade analyses
+- `/users/{userId}/assignments/{assignmentsId}`: Extracted assignments
+- `/users/{userId}/document_uploads/{documentId}`: Document upload metadata
+- `/training_data/students`: Training data for Linear Regression model
+
+### Firebase Storage
+
+- `/users/{userId}/syllabus/{filename}.pdf`: Syllabus PDFs
+- `/users/{userId}/transcript/{filename}.pdf`: Transcript PDFs
+
+## Environment Variables
+
+- `OPENAI_API_KEY`: OpenAI API key for ChatGPT predictions
+
+## Dependencies
+
+- `firebase_functions`: Firebase Cloud Functions SDK
+- `firebase_admin`: Firebase Admin SDK
+- `google-cloud-firestore`: Google Cloud Firestore SDK
+- `openai`: OpenAI API SDK
+- `numpy`: Numerical computing library
+- `scikit-learn`: Machine learning library
+- `PyMuPDF`: PDF processing library
+- `python-dotenv`: Environment variable management
@@ -0,0 +1,157 @@
+from firebase_functions import https_fn
+from firebase_admin import firestore
+import google.cloud.firestore
+import json
+import os
+
+@https_fn.on_call()
+def get_combined_prediction(req: https_fn.CallableRequest) -> dict:
+    """
+    Get combined prediction from both ChatGPT and Linear Regression models.
+    """
+    if not req.auth:
+        raise https_fn.HttpsError(
+            code=https_fn.FunctionsErrorCode.UNAUTHENTICATED,
+            message="User must be authenticated"
+        )
+    
+    user_id = req.auth.uid
+    
+    # Get prediction IDs from request
+    chatgpt_prediction_id = req.data.get("chatgptPredictionId")
+    ml_prediction_id = req.data.get("mlPredictionId")
+    
+    if not chatgpt_prediction_id or not ml_prediction_id:
+        raise https_fn.HttpsError(
+            code=https_fn.FunctionsErrorCode.INVALID_ARGUMENT,
+            message="Both ChatGPT and ML prediction IDs are required"
+        )
+    
+    # Get predictions from Firestore
+    try:
+        db = firestore.client()
+        
+        # Get ChatGPT prediction
+        chatgpt_prediction_ref = db.collection("users").document(user_id).collection("predictions").document(chatgpt_prediction_id)
+        chatgpt_prediction_doc = chatgpt_prediction_ref.get()
+        
+        if not chatgpt_prediction_doc.exists:
+            raise https_fn.HttpsError(
+                code=https_fn.FunctionsErrorCode.NOT_FOUND,
+                message="ChatGPT prediction not found"
+            )
+        
+        chatgpt_prediction = chatgpt_prediction_doc.to_dict()
+        
+        # Get ML prediction
+        ml_prediction_ref = db.collection("users").document(user_id).collection("ml_predictions").document(ml_prediction_id)
+        ml_prediction_doc = ml_prediction_ref.get()
+        
+        if not ml_prediction_doc.exists:
+            raise https_fn.HttpsError(
+                code=https_fn.FunctionsErrorCode.NOT_FOUND,
+                message="ML prediction not found"
+            )
+        
+        ml_prediction = ml_prediction_doc.to_dict()
+        
+        # Combine predictions
+        chatgpt_grade = chatgpt_prediction["prediction"]["grade"]
+        ml_grade = ml_prediction["prediction"]["grade"]
+        
+        # Simple average of both predictions
+        combined_grade = (float(chatgpt_grade) + float(ml_grade)) / 2
+        
+        # Create combined prediction
+        combined_prediction = {
+            "grade": combined_grade,
+            "chatgpt_grade": chatgpt_grade,
+            "ml_grade": ml_grade,
+            "reasoning": chatgpt_prediction["prediction"]["reasoning"],
+            "confidence": "medium"  # Default confidence
+        }
+        
+        # Determine confidence based on agreement between models
+        grade_difference = abs(float(chatgpt_grade) - float(ml_grade))
+        if grade_difference < 5:
+            combined_prediction["confidence"] = "high"
+        elif grade_difference > 15:
+            combined_prediction["confidence"] = "low"
+        
+        # Store combined prediction in Firestore
+        combined_ref = db.collection("users").document(user_id).collection("combined_predictions").document()
+        
+        combined_data = {
+            "chatgptPrediction": chatgpt_prediction,
+            "mlPrediction": ml_prediction,
+            "combinedPrediction": combined_prediction,
+            "createdAt": firestore.SERVER_TIMESTAMP
+        }
+        
+        combined_ref.set(combined_data)
+        
+        return {
+            "success": True,
+            "predictionId": combined_ref.id,
+            "prediction": combined_prediction
+        }
+    
+    except Exception as e:
+        raise https_fn.HttpsError(
+            code=https_fn.FunctionsErrorCode.INTERNAL,
+            message=f"Error getting combined prediction: {str(e)}"
+        )
+
+@https_fn.on_call()
+def get_latest_predictions(req: https_fn.CallableRequest) -> dict:
+    """
+    Get the latest predictions for a user.
+    """
+    if not req.auth:
+        raise https_fn.HttpsError(
+            code=https_fn.FunctionsErrorCode.UNAUTHENTICATED,
+            message="User must be authenticated"
+        )
+    
+    user_id = req.auth.uid
+    
+    # Get predictions from Firestore
+    try:
+        db = firestore.client()
+        
+        # Get latest ChatGPT prediction
+        chatgpt_predictions = db.collection("users").document(user_id).collection("predictions").order_by("createdAt", direction=firestore.Query.DESCENDING).limit(1).stream()
+        chatgpt_prediction = None
+        for doc in chatgpt_predictions:
+            chatgpt_prediction = doc.to_dict()
+            chatgpt_prediction["id"] = doc.id
+            break
+        
+        # Get latest ML prediction
+        ml_predictions = db.collection("users").document(user_id).collection("ml_predictions").order_by("createdAt", direction=firestore.Query.DESCENDING).limit(1).stream()
+        ml_prediction = None
+        for doc in ml_predictions:
+            ml_prediction = doc.to_dict()
+            ml_prediction["id"] = doc.id
+            break
+        
+        # Get latest combined prediction
+        combined_predictions = db.collection("users").document(user_id).collection("combined_predictions").order_by("createdAt", direction=firestore.Query.DESCENDING).limit(1).stream()
+        combined_prediction = None
+        for doc in combined_predictions:
+            combined_prediction = doc.to_dict()
+            combined_prediction["id"] = doc.id
+            break
+        
+        return {
+            "success": True,
+            "chatgptPrediction": chatgpt_prediction,
+            "mlPrediction": ml_prediction,
+            "combinedPrediction": combined_prediction
+        }
+    
+    except Exception as e:
+        raise https_fn.HttpsError(
+            code=https_fn.FunctionsErrorCode.INTERNAL,
+            message=f"Error getting latest predictions: {str(e)}"
+        )