Skip to content

Commit 95d58f7

Browse files
authored
Merge pull request #147 from gwydion67/master
Fetch the latest Academic Calendar, parse its data and generate the ics file
2 parents d2149a8 + 0837f38 commit 95d58f7

File tree

6 files changed

+268
-7
lines changed

6 files changed

+268
-7
lines changed

.gitignore

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,8 @@ data.txt
88
.idea/
99
.vscode
1010
venv
11-
.env
11+
.env
12+
13+
ACADEMIC_CALENDAR_*.pdf
14+
Academic_Cal-j/**
15+
final.json

requirements.txt

Lines changed: 52 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,58 @@
11
beautifulsoup4==4.12.2
2-
google_api_python_client==2.90.0
2+
blinker==1.8.2
3+
bs4==0.0.2
4+
cachetools==5.5.0
5+
certifi==2024.8.30
6+
cffi==1.17.1
7+
chardet==5.2.0
8+
charset-normalizer==3.4.0
9+
click==8.1.7
10+
cryptography==43.0.1
11+
et-xmlfile==1.1.0
12+
Flask==3.0.3
13+
Flask-Cors==4.0.1
14+
ghostscript==0.7
15+
google-api-core==2.21.0
16+
google-api-python-client==2.90.0
17+
google-auth==2.35.0
18+
google-auth-httplib2==0.2.0
19+
google-auth-oauthlib==1.2.1
20+
googleapis-common-protos==1.65.0
21+
gunicorn==22.0.0
322
httplib2==0.22.0
423
icalendar==5.0.7
24+
idna==3.10
525
iitkgp_erp_login==2.4.2
26+
itsdangerous==2.2.0
27+
Jinja2==3.1.4
28+
MarkupSafe==3.0.1
29+
numpy==2.1.2
630
oauth2client==4.1.3
31+
oauthlib==3.2.2
32+
opencv-python==4.10.0.84
33+
openpyxl==3.1.5
34+
packaging==24.1
35+
pandas==2.2.3
36+
pdfminer.six==20240706
37+
proto-plus==1.24.0
38+
protobuf==5.28.2
39+
pyasn1==0.6.1
40+
pyasn1_modules==0.4.1
41+
pycparser==2.22
42+
pyparsing==3.2.0
43+
pypdf==4.3.1
44+
pypdf_table_extraction==0.0.2
45+
python-dateutil==2.9.0.post0
746
pytz==2023.3
8-
Requests==2.31.0
9-
flask==3.0.3
10-
flask_cors==4.0.1
11-
gunicorn==22.0.0
47+
requests==2.31.0
48+
requests-oauthlib==2.0.0
49+
rsa==4.9
50+
setuptools==75.1.0
51+
six==1.16.0
52+
soupsieve==2.6
53+
tabulate==0.9.0
54+
tk==0.1.0
55+
tzdata==2024.2
56+
uritemplate==4.1.1
57+
urllib3==2.2.3
58+
Werkzeug==3.0.4

requirements.txt.bak

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
beautifulsoup4==4.12.2
2+
google_api_python_client==2.90.0
3+
httplib2==0.22.0
4+
icalendar==5.0.7
5+
iitkgp_erp_login==2.4.2
6+
oauth2client==4.1.3
7+
pytz==2023.3
8+
Requests==2.31.0
9+
flask==3.0.3
10+
flask_cors==4.0.1
11+
gunicorn==22.0.0

timetable/generate_ics.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from icalendar import Calendar, Event
44
from datetime import datetime, timedelta
55
from timetable import Course
6-
from utils import dates, build_event_duration, generate_india_time, next_weekday
6+
from utils import academic_calander_handler, dates, build_event_duration, generate_india_time, next_weekday
77

88
WORKING_DAYS = dates.get_dates()
99

@@ -51,6 +51,13 @@ def generate_ics(courses: list[Course], output_filename):
5151
event.add("dtstart", holiday[1])
5252
event.add("dtend", holiday[1] + timedelta(days=1))
5353
cal.add_component(event)
54+
55+
for entry in academic_calander_handler.get_academic_calendar():
56+
event = Event()
57+
event.add("summary", entry.event)
58+
event.add("dtstart",entry.start_date)
59+
event.add("dtend",entry.end_date)
60+
cal.add_component(event)
5461

5562

5663
if output_filename != "":

utils/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
from utils.dates import *
22
from utils.build_event import *
3+
from utils.academic_calander_handler import *
34

utils/academic_calander_handler.py

Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
from datetime import datetime, timedelta
2+
import glob
3+
import camelot
4+
import os
5+
import requests
6+
import shutil
7+
from zipfile import ZipFile
8+
import json
9+
from dataclasses import dataclass
10+
import re
11+
12+
13+
JSON_FOLDER_NAME = 'Academic_Cal-j'
14+
15+
@dataclass
16+
class DataEntry:
17+
start_date: datetime = datetime.today()
18+
end_date: datetime = datetime.today()
19+
event: str = ""
20+
21+
#get the current working directory
22+
def cwd():
23+
return os.getcwd()
24+
25+
def get_latest_calendar_name():
26+
curr_year = datetime.today().year
27+
curr_month = datetime.today().month
28+
29+
if(curr_month < 7):
30+
curr_year -= 1
31+
32+
year_str = str(curr_year) + '_' + str((curr_year % 100) + 1)
33+
filename = 'ACADEMIC_CALENDAR_' + year_str + '.pdf'
34+
return filename
35+
36+
def is_file_present(file):
37+
if(os.path.exists(cwd() + '/' + file) or
38+
os.path.exists(cwd() + '/' + file + '/')
39+
):
40+
return True
41+
return False
42+
43+
def delete_file(file):
44+
if(is_file_present(file)):
45+
try:
46+
print("DELETING file ",file)
47+
if(os.path.isdir(file)):
48+
shutil.rmtree(cwd() + '/' + file)
49+
elif(os.path.isfile(file)):
50+
os.remove(file)
51+
else:
52+
raise Exception("filename not valid")
53+
except Exception as e:
54+
print("ERROR: seems file already exists but cannot be deleted")
55+
print(e)
56+
return False
57+
else:
58+
print(file, "File not present..")
59+
60+
#fetch the latest academic calendar from the iitkgp website
61+
def get_latest_calendar():
62+
63+
filename = get_latest_calendar_name()
64+
url = 'https://www.iitkgp.ac.in/assets/pdf/' + filename
65+
66+
## delete any old academic calander pdf if exists
67+
if(is_file_present(filename)):
68+
delete_file(filename)
69+
70+
with open(filename,"wb") as file:
71+
response = requests.get(url)
72+
file.write(response.content)
73+
74+
if(is_file_present(filename)):
75+
return True
76+
return False
77+
78+
def upzip_and_delete_zip(zip_file_name,result_folder_name):
79+
with ZipFile(zip_file_name) as zip:
80+
try:
81+
zip.extractall(result_folder_name)
82+
except Exception as E:
83+
print(E)
84+
return False
85+
86+
print("Zip File not needed anymore, Deleteting ", zip_file_name)
87+
delete_file(zip_file_name)
88+
return True
89+
90+
def export_json():
91+
filename = get_latest_calendar_name()
92+
## ignore the read_pdf not found warning
93+
tables = camelot.read_pdf(filename,pages="all")
94+
95+
print("Checking for pre-existing folder")
96+
delete_file(JSON_FOLDER_NAME)
97+
98+
try:
99+
tables.export((JSON_FOLDER_NAME + '.json'),f='json',compress=True)
100+
except Exception as E:
101+
print(E)
102+
return False
103+
104+
upzip_and_delete_zip((JSON_FOLDER_NAME + '.zip'),JSON_FOLDER_NAME)
105+
return True
106+
107+
def get_json_files():
108+
folder_path = cwd() + '/' + JSON_FOLDER_NAME
109+
if(is_file_present(JSON_FOLDER_NAME)):
110+
files = glob.glob(folder_path + '/*.json',include_hidden=True)
111+
return files
112+
else:
113+
return []
114+
115+
def merge_json():
116+
merged_data = []
117+
for file in get_json_files():
118+
with open(file) as f:
119+
data = json.load(f)
120+
merged_data.extend(data)
121+
122+
with open('final.json',"w") as f:
123+
json.dump(merged_data,f,indent=4)
124+
125+
return merged_data
126+
127+
def get_academic_calendar() -> list[DataEntry]:
128+
129+
get_latest_calendar()
130+
export_json()
131+
132+
all_dates = merge_json()
133+
all_dates = all_dates[1:]
134+
135+
main_dates = []
136+
# for date in all_dates:
137+
# entry = DataEntry()
138+
# if(len(date) > 4 and date['4'] != ''):
139+
# if(len(date['1']) > 3):
140+
# entry.event += date['1'].replace('\n','')
141+
# entry.event += date['2'].replace('\n','')
142+
# d = date['4'].replace('\n',' ').replace('(AN)','')
143+
# print(d.find("to"))
144+
# if(d.lower().find("to") != -1):
145+
# d = str(d).lower().split("to")
146+
# entry.start_date = datetime.strptime(d[0].split(" ")[0].strip(), "%d.%m.%Y")
147+
# entry.end_date = datetime.strptime(d[-1].split(" ")[-1].strip(), "%d.%m.%Y")
148+
# else:
149+
# entry.start_date = datetime.strptime(d,"%d.%m.%Y")
150+
# entry.end_date = ( entry.start_date + timedelta(1) )
151+
# # elif(len(date) == 2 and date['1'] != ''):
152+
# # entry.event = date['0']
153+
# # d = date['1'].replace('\n','')
154+
# # if(d.find("to")):
155+
# # d = str(d).split("to")
156+
# # entry.start_date = datetime.strptime(d[0].strip(), "%A, %d %B %Y")
157+
# # entry.end_date = datetime.strptime(d[1].strip(), "%A, %d %B %Y")
158+
# # else:
159+
# # entry.start_date = datetime.strptime(d,"%A, %d %B %Y")
160+
# # entry.end_date = ( entry.start_date + timedelta(1) )
161+
# # main_dates.append([date['0'],datetime_object])
162+
# main_dates.append(entry)
163+
164+
date_regex = re.compile(r'\d{2}.\d{2}.\d{4}')
165+
maxLen = 1
166+
for date in all_dates:
167+
if(len(date) > 4 and date['4'] != ''):
168+
entry = DataEntry()
169+
if(len(date['1']) > 3):
170+
entry.event += date['1'].replace('\n','')
171+
entry.event += date['2'].replace('\n','')
172+
173+
d =date['3'].replace('\n',' ').replace('(AN)','') + date['4'].replace('\n',' ').replace('(AN)','')
174+
d = date_regex.findall(d)
175+
if(maxLen < len(d)):
176+
maxLen = len(d)
177+
if(len(d) == 1):
178+
entry.start_date = datetime.strptime(d[0],"%d.%m.%Y")
179+
entry.end_date = ( entry.start_date + timedelta(1) )
180+
elif(len(d) == 2):
181+
entry.start_date = datetime.strptime(d[0],"%d.%m.%Y")
182+
entry.end_date = datetime.strptime(d[1],"%d.%m.%Y")
183+
main_dates.append(entry)
184+
annual_convocation = str(date['1']).strip().lower().split(" ")
185+
## KGP hai .. cannot trust, they can even mess up the spellings of annual convocation
186+
## this can just reduce the amount of places this will fail
187+
if(len(annual_convocation) == 2 and ("annual" in annual_convocation or "convocation" in annual_convocation)):
188+
break
189+
190+
return main_dates
191+

0 commit comments

Comments
 (0)