|
| 1 | +from datetime import datetime, timedelta |
| 2 | +import glob |
| 3 | +import camelot |
| 4 | +import os |
| 5 | +import requests |
| 6 | +import shutil |
| 7 | +from zipfile import ZipFile |
| 8 | +import json |
| 9 | +from dataclasses import dataclass |
| 10 | +import re |
| 11 | + |
| 12 | + |
| 13 | +JSON_FOLDER_NAME = 'Academic_Cal-j' |
| 14 | + |
| 15 | +@dataclass |
| 16 | +class DataEntry: |
| 17 | + start_date: datetime = datetime.today() |
| 18 | + end_date: datetime = datetime.today() |
| 19 | + event: str = "" |
| 20 | + |
| 21 | +#get the current working directory |
| 22 | +def cwd(): |
| 23 | + return os.getcwd() |
| 24 | + |
| 25 | +def get_latest_calendar_name(): |
| 26 | + curr_year = datetime.today().year |
| 27 | + curr_month = datetime.today().month |
| 28 | + |
| 29 | + if(curr_month < 7): |
| 30 | + curr_year -= 1 |
| 31 | + |
| 32 | + year_str = str(curr_year) + '_' + str((curr_year % 100) + 1) |
| 33 | + filename = 'ACADEMIC_CALENDAR_' + year_str + '.pdf' |
| 34 | + return filename |
| 35 | + |
| 36 | +def is_file_present(file): |
| 37 | + if(os.path.exists(cwd() + '/' + file) or |
| 38 | + os.path.exists(cwd() + '/' + file + '/') |
| 39 | + ): |
| 40 | + return True |
| 41 | + return False |
| 42 | + |
| 43 | +def delete_file(file): |
| 44 | + if(is_file_present(file)): |
| 45 | + try: |
| 46 | + print("DELETING file ",file) |
| 47 | + if(os.path.isdir(file)): |
| 48 | + shutil.rmtree(cwd() + '/' + file) |
| 49 | + elif(os.path.isfile(file)): |
| 50 | + os.remove(file) |
| 51 | + else: |
| 52 | + raise Exception("filename not valid") |
| 53 | + except Exception as e: |
| 54 | + print("ERROR: seems file already exists but cannot be deleted") |
| 55 | + print(e) |
| 56 | + return False |
| 57 | + else: |
| 58 | + print(file, "File not present..") |
| 59 | + |
| 60 | +#fetch the latest academic calendar from the iitkgp website |
| 61 | +def get_latest_calendar(): |
| 62 | + |
| 63 | + filename = get_latest_calendar_name() |
| 64 | + url = 'https://www.iitkgp.ac.in/assets/pdf/' + filename |
| 65 | + |
| 66 | + ## delete any old academic calander pdf if exists |
| 67 | + if(is_file_present(filename)): |
| 68 | + delete_file(filename) |
| 69 | + |
| 70 | + with open(filename,"wb") as file: |
| 71 | + response = requests.get(url) |
| 72 | + file.write(response.content) |
| 73 | + |
| 74 | + if(is_file_present(filename)): |
| 75 | + return True |
| 76 | + return False |
| 77 | + |
| 78 | +def upzip_and_delete_zip(zip_file_name,result_folder_name): |
| 79 | + with ZipFile(zip_file_name) as zip: |
| 80 | + try: |
| 81 | + zip.extractall(result_folder_name) |
| 82 | + except Exception as E: |
| 83 | + print(E) |
| 84 | + return False |
| 85 | + |
| 86 | + print("Zip File not needed anymore, Deleteting ", zip_file_name) |
| 87 | + delete_file(zip_file_name) |
| 88 | + return True |
| 89 | + |
| 90 | +def export_json(): |
| 91 | + filename = get_latest_calendar_name() |
| 92 | + ## ignore the read_pdf not found warning |
| 93 | + tables = camelot.read_pdf(filename,pages="all") |
| 94 | + |
| 95 | + print("Checking for pre-existing folder") |
| 96 | + delete_file(JSON_FOLDER_NAME) |
| 97 | + |
| 98 | + try: |
| 99 | + tables.export((JSON_FOLDER_NAME + '.json'),f='json',compress=True) |
| 100 | + except Exception as E: |
| 101 | + print(E) |
| 102 | + return False |
| 103 | + |
| 104 | + upzip_and_delete_zip((JSON_FOLDER_NAME + '.zip'),JSON_FOLDER_NAME) |
| 105 | + return True |
| 106 | + |
| 107 | +def get_json_files(): |
| 108 | + folder_path = cwd() + '/' + JSON_FOLDER_NAME |
| 109 | + if(is_file_present(JSON_FOLDER_NAME)): |
| 110 | + files = glob.glob(folder_path + '/*.json',include_hidden=True) |
| 111 | + return files |
| 112 | + else: |
| 113 | + return [] |
| 114 | + |
| 115 | +def merge_json(): |
| 116 | + merged_data = [] |
| 117 | + for file in get_json_files(): |
| 118 | + with open(file) as f: |
| 119 | + data = json.load(f) |
| 120 | + merged_data.extend(data) |
| 121 | + |
| 122 | + with open('final.json',"w") as f: |
| 123 | + json.dump(merged_data,f,indent=4) |
| 124 | + |
| 125 | + return merged_data |
| 126 | + |
| 127 | +def get_academic_calendar() -> list[DataEntry]: |
| 128 | + |
| 129 | + get_latest_calendar() |
| 130 | + export_json() |
| 131 | + |
| 132 | + all_dates = merge_json() |
| 133 | + all_dates = all_dates[1:] |
| 134 | + |
| 135 | + main_dates = [] |
| 136 | + # for date in all_dates: |
| 137 | + # entry = DataEntry() |
| 138 | + # if(len(date) > 4 and date['4'] != ''): |
| 139 | + # if(len(date['1']) > 3): |
| 140 | + # entry.event += date['1'].replace('\n','') |
| 141 | + # entry.event += date['2'].replace('\n','') |
| 142 | + # d = date['4'].replace('\n',' ').replace('(AN)','') |
| 143 | + # print(d.find("to")) |
| 144 | + # if(d.lower().find("to") != -1): |
| 145 | + # d = str(d).lower().split("to") |
| 146 | + # entry.start_date = datetime.strptime(d[0].split(" ")[0].strip(), "%d.%m.%Y") |
| 147 | + # entry.end_date = datetime.strptime(d[-1].split(" ")[-1].strip(), "%d.%m.%Y") |
| 148 | + # else: |
| 149 | + # entry.start_date = datetime.strptime(d,"%d.%m.%Y") |
| 150 | + # entry.end_date = ( entry.start_date + timedelta(1) ) |
| 151 | + # # elif(len(date) == 2 and date['1'] != ''): |
| 152 | + # # entry.event = date['0'] |
| 153 | + # # d = date['1'].replace('\n','') |
| 154 | + # # if(d.find("to")): |
| 155 | + # # d = str(d).split("to") |
| 156 | + # # entry.start_date = datetime.strptime(d[0].strip(), "%A, %d %B %Y") |
| 157 | + # # entry.end_date = datetime.strptime(d[1].strip(), "%A, %d %B %Y") |
| 158 | + # # else: |
| 159 | + # # entry.start_date = datetime.strptime(d,"%A, %d %B %Y") |
| 160 | + # # entry.end_date = ( entry.start_date + timedelta(1) ) |
| 161 | + # # main_dates.append([date['0'],datetime_object]) |
| 162 | + # main_dates.append(entry) |
| 163 | + |
| 164 | + date_regex = re.compile(r'\d{2}.\d{2}.\d{4}') |
| 165 | + maxLen = 1 |
| 166 | + for date in all_dates: |
| 167 | + if(len(date) > 4 and date['4'] != ''): |
| 168 | + entry = DataEntry() |
| 169 | + if(len(date['1']) > 3): |
| 170 | + entry.event += date['1'].replace('\n','') |
| 171 | + entry.event += date['2'].replace('\n','') |
| 172 | + |
| 173 | + d =date['3'].replace('\n',' ').replace('(AN)','') + date['4'].replace('\n',' ').replace('(AN)','') |
| 174 | + d = date_regex.findall(d) |
| 175 | + if(maxLen < len(d)): |
| 176 | + maxLen = len(d) |
| 177 | + if(len(d) == 1): |
| 178 | + entry.start_date = datetime.strptime(d[0],"%d.%m.%Y") |
| 179 | + entry.end_date = ( entry.start_date + timedelta(1) ) |
| 180 | + elif(len(d) == 2): |
| 181 | + entry.start_date = datetime.strptime(d[0],"%d.%m.%Y") |
| 182 | + entry.end_date = datetime.strptime(d[1],"%d.%m.%Y") |
| 183 | + main_dates.append(entry) |
| 184 | + annual_convocation = str(date['1']).strip().lower().split(" ") |
| 185 | + ## KGP hai .. cannot trust, they can even mess up the spellings of annual convocation |
| 186 | + ## this can just reduce the amount of places this will fail |
| 187 | + if(len(annual_convocation) == 2 and ("annual" in annual_convocation or "convocation" in annual_convocation)): |
| 188 | + break |
| 189 | + |
| 190 | + return main_dates |
| 191 | + |
0 commit comments