Merge pull request #2 from MSU-AI/consolidate-functions

WCarey34 · web-flow · commit 928b5d6f449e · 2025-03-17T17:44:41.000-04:00
Consolidate functions
diff --git a/backend/grade_processing.py b/backend/grade_processing.py
@@ -0,0 +1,54 @@
+import fitz  # PyMuPDF
+import openai
+import os
+from dotenv import load_dotenv
+
+# Load API Key
+load_dotenv()
+API_KEY = os.getenv("OPENAI_API_KEY")
+client = openai.OpenAI(api_key=API_KEY)
+
+def extract_and_process_pdf(pdf_file):
+    """Extracts text from an uploaded PDF and processes it using OpenAI."""
+    
+    # Read the PDF file
+    doc = fitz.open(stream=pdf_file.read(), filetype="pdf")  
+    text = "\n".join([page.get_text("text") for page in doc])
+
+    if not text.strip():
+        raise ValueError("No text extracted from the PDF.")
+
+    # Process extracted text with OpenAI
+    response = client.chat.completions.create(
+        model="gpt-4",
+        messages=[
+            {"role": "system", "content": 
+             """Extract and format the following information from this document into JSON:
+             - Course Name
+             - Instructor Name
+             - Grade Weight
+             - Assignment Names
+             - GPA (if applicable)
+             - Final Grade
+             - Due Dates
+             - Credit Hours
+             
+             Return the data in this JSON format:
+             {
+                "course_name": "...",
+                "instructor": "...",
+                "grade_weights": [{"name": "...", "weight": "..."}],
+                "assignments": ["...", "..."],
+                "gpa": "...",
+                "final_grade": "...",
+                "due_dates": [{"assignment": "...", "due_date": "..."}],
+                "credit_hours": "..."
+             }
+             """},
+            {"role": "user", "content": text}
+        ]
+    )
+
+    structured_data = response.choices[0].message.content
+    return structured_data  # JSON-like extracted data
+
diff --git a/backend/test_script.py b/backend/test_script.py
@@ -0,0 +1,8 @@
+from grade_processing import extract_and_process_pdf
+
+# Open a sample PDF file
+with open("P_Cubed_Syllabus_SPRING2025.pdf", "rb") as file:
+    structured_data = extract_and_process_pdf(file)
+
+print("\n🔹 Extracted Data:", structured_data)
+