File tree Expand file tree Collapse file tree 2 files changed +62
-0
lines changed
Expand file tree Collapse file tree 2 files changed +62
-0
lines changed Original file line number Diff line number Diff line change 1+ import fitz # PyMuPDF
2+ import openai
3+ import os
4+ from dotenv import load_dotenv
5+
6+ # Load API Key
7+ load_dotenv ()
8+ API_KEY = os .getenv ("OPENAI_API_KEY" )
9+ client = openai .OpenAI (api_key = API_KEY )
10+
11+ def extract_and_process_pdf (pdf_file ):
12+ """Extracts text from an uploaded PDF and processes it using OpenAI."""
13+
14+ # Read the PDF file
15+ doc = fitz .open (stream = pdf_file .read (), filetype = "pdf" )
16+ text = "\n " .join ([page .get_text ("text" ) for page in doc ])
17+
18+ if not text .strip ():
19+ raise ValueError ("No text extracted from the PDF." )
20+
21+ # Process extracted text with OpenAI
22+ response = client .chat .completions .create (
23+ model = "gpt-4" ,
24+ messages = [
25+ {"role" : "system" , "content" :
26+ """Extract and format the following information from this document into JSON:
27+ - Course Name
28+ - Instructor Name
29+ - Grade Weight
30+ - Assignment Names
31+ - GPA (if applicable)
32+ - Final Grade
33+ - Due Dates
34+ - Credit Hours
35+
36+ Return the data in this JSON format:
37+ {
38+ "course_name": "...",
39+ "instructor": "...",
40+ "grade_weights": [{"name": "...", "weight": "..."}],
41+ "assignments": ["...", "..."],
42+ "gpa": "...",
43+ "final_grade": "...",
44+ "due_dates": [{"assignment": "...", "due_date": "..."}],
45+ "credit_hours": "..."
46+ }
47+ """ },
48+ {"role" : "user" , "content" : text }
49+ ]
50+ )
51+
52+ structured_data = response .choices [0 ].message .content
53+ return structured_data # JSON-like extracted data
54+
Original file line number Diff line number Diff line change 1+ from grade_processing import extract_and_process_pdf
2+
3+ # Open a sample PDF file
4+ with open ("P_Cubed_Syllabus_SPRING2025.pdf" , "rb" ) as file :
5+ structured_data = extract_and_process_pdf (file )
6+
7+ print ("\n 🔹 Extracted Data:" , structured_data )
8+
You can’t perform that action at this time.
0 commit comments