Skip to content

Commit dc3761c

Browse files
authored
Merge pull request #3 from MSU-AI/consolidate-functions
Consolidate functions
2 parents d977372 + e5c7f36 commit dc3761c

File tree

5 files changed

+62
-3
lines changed

5 files changed

+62
-3
lines changed

backend/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
"P_Cubed_Syllabus_SPRING2025.pdf"
2+
"pdf_parser.py"
365 KB
Binary file not shown.

backend/grade_processing.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import fitz # PyMuPDF
22
import openai
33
import os
4+
import json
45
from dotenv import load_dotenv
56

67
# Load API Key
@@ -49,6 +50,12 @@ def extract_and_process_pdf(pdf_file):
4950
]
5051
)
5152

52-
structured_data = response.choices[0].message.content
53-
return structured_data # JSON-like extracted data
53+
# Ensure response is formatted as JSON
54+
try:
55+
structured_data = json.loads(response.choices[0].message.content)
56+
except json.JSONDecodeError:
57+
raise ValueError("GPT response was not in valid JSON format.")
58+
59+
return structured_data # Return a dictionary, not a string
60+
5461

backend/pdf_parser.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import fitz # PyMuPDF
2+
import os
3+
import openai
4+
from dotenv import load_dotenv
5+
6+
7+
load_dotenv()
8+
API_KEY = os.getenv("OPENAI_API_KEY")
9+
client = openai.OpenAI(api_key=API_KEY)
10+
11+
def extract_text_from_pdf(pdf_path):
12+
if not os.path.exists(pdf_path):
13+
raise FileNotFoundError(f"File not found: {os.path.abspath(pdf_path)}")
14+
15+
doc = fitz.open(pdf_path)
16+
text = "\n".join([page.get_text("text") for page in doc])
17+
18+
if not text.strip():
19+
raise ValueError("No text extracted from the PDF. Check if the file is scanned or empty.")
20+
return text
21+
22+
def process_text_with_openai(text):
23+
response = client.chat.completions.create(
24+
model="gpt-4", # or "gpt-3.5-turbo"
25+
messages=[
26+
{"role": "system", "content": "Extract grades and weights from this text."},
27+
{"role": "user", "content": text}
28+
]
29+
)
30+
return response.choices[0].message.content
31+
32+
if __name__ == "__main__":
33+
34+
pdf_path = "P_Cubed_Syllabus_SPRING2025.pdf"
35+
36+
try:
37+
pdf_text = extract_text_from_pdf(pdf_path)
38+
print("PDF Text Extraction Successful!")
39+
structured_data = process_text_with_openai(pdf_text)
40+
print("\nExtracted Data from OpenAI:\n", structured_data)
41+
except FileNotFoundError as e:
42+
print(e)
43+
except ValueError as e:
44+
print(e)
45+
except Exception as e:
46+
print(f"Unexpected Error: {e}")

backend/test_script.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
from grade_processing import extract_and_process_pdf
2+
import json
23

34
# Open a sample PDF file
45
with open("P_Cubed_Syllabus_SPRING2025.pdf", "rb") as file:
56
structured_data = extract_and_process_pdf(file)
67

7-
print("\n🔹 Extracted Data:", structured_data)
8+
# Print formatted JSON output
9+
print("\n🔹 Extracted Data (JSON Format):")
10+
print(json.dumps(structured_data, indent=4))
11+
812

0 commit comments

Comments
 (0)