Merge pull request #3 from MSU-AI/consolidate-functions

WCarey34 · web-flow · commit dc3761cb6cb4 · 2025-03-17T18:12:02.000-04:00
Consolidate functions
diff --git a/backend/.gitignore b/backend/.gitignore
@@ -0,0 +1,2 @@
+"P_Cubed_Syllabus_SPRING2025.pdf" 
+"pdf_parser.py" 
diff --git a/backend/P_Cubed_Syllabus_SPRING2025.pdf b/backend/P_Cubed_Syllabus_SPRING2025.pdf
diff --git a/backend/grade_processing.py b/backend/grade_processing.py
@@ -1,6 +1,7 @@
 import fitz  # PyMuPDF
 import openai
 import os
+import json
 from dotenv import load_dotenv
 
 # Load API Key
@@ -49,6 +50,12 @@ def extract_and_process_pdf(pdf_file):
         ]
     )
 
-    structured_data = response.choices[0].message.content
-    return structured_data  # JSON-like extracted data
+    # Ensure response is formatted as JSON
+    try:
+        structured_data = json.loads(response.choices[0].message.content)
+    except json.JSONDecodeError:
+        raise ValueError("GPT response was not in valid JSON format.")
+
+    return structured_data  # Return a dictionary, not a string
+
 
diff --git a/backend/pdf_parser.py b/backend/pdf_parser.py
@@ -0,0 +1,46 @@
+import fitz  # PyMuPDF
+import os
+import openai
+from dotenv import load_dotenv
+
+
+load_dotenv()
+API_KEY = os.getenv("OPENAI_API_KEY")
+client = openai.OpenAI(api_key=API_KEY)
+
+def extract_text_from_pdf(pdf_path):
+    if not os.path.exists(pdf_path):
+        raise FileNotFoundError(f"File not found: {os.path.abspath(pdf_path)}")
+
+    doc = fitz.open(pdf_path)
+    text = "\n".join([page.get_text("text") for page in doc])
+    
+    if not text.strip():
+        raise ValueError("No text extracted from the PDF. Check if the file is scanned or empty.")
+    return text
+
+def process_text_with_openai(text):
+    response = client.chat.completions.create(
+        model="gpt-4",  # or "gpt-3.5-turbo"
+        messages=[
+            {"role": "system", "content": "Extract grades and weights from this text."},
+            {"role": "user", "content": text}
+        ]
+    )
+    return response.choices[0].message.content
+
+if __name__ == "__main__":
+    
+    pdf_path = "P_Cubed_Syllabus_SPRING2025.pdf"
+
+    try:
+        pdf_text = extract_text_from_pdf(pdf_path)
+        print("PDF Text Extraction Successful!")
+        structured_data = process_text_with_openai(pdf_text)
+        print("\nExtracted Data from OpenAI:\n", structured_data)
+    except FileNotFoundError as e:
+        print(e)
+    except ValueError as e:
+        print(e)
+    except Exception as e:
+        print(f"Unexpected Error: {e}")
diff --git a/backend/test_script.py b/backend/test_script.py
@@ -1,8 +1,12 @@
 from grade_processing import extract_and_process_pdf
+import json
 
 # Open a sample PDF file
 with open("P_Cubed_Syllabus_SPRING2025.pdf", "rb") as file:
     structured_data = extract_and_process_pdf(file)
 
-print("\n🔹 Extracted Data:", structured_data)
+# Print formatted JSON output
+print("\n🔹 Extracted Data (JSON Format):")
+print(json.dumps(structured_data, indent=4))
+
 

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+"P_Cubed_Syllabus_SPRING2025.pdf"`
	`2`	`+"pdf_parser.py"`