22import json
33import logging
44from env import ROLL_NUMBER
5+ from utils import safe_text
56from datetime import datetime
67import xml .etree .ElementTree as ET
78from bs4 import BeautifulSoup as bs
@@ -18,13 +19,13 @@ def filter(companies, filter):
1819 if filter .upper () == "OPEN" :
1920 filter_func = currently_open
2021 elif filter .upper () == "OPEN_N" :
21- filter_func = open_not_applied # important
22+ filter_func = open_not_applied
2223 elif filter .upper () == "APPLIED" :
2324 filter_func = applied
2425 elif filter .upper () == "APPLIED_Y" :
25- filter_func = applied_available # important
26+ filter_func = applied_available
2627 elif filter .upper () == "APPLIED_N" :
27- filter_func = applied_not_available # important
28+ filter_func = applied_not_available
2829
2930 filtered = []
3031 for company in companies :
@@ -54,7 +55,7 @@ def fetch(session, headers, ssoToken):
5455
5556 fetched_companies = []
5657 for row in root .findall ("row" ):
57- jd_args = row .find ("cell[4]" ). text .split ("'" )[5 ].split ('"' )
58+ jd_args = safe_text ( row .find ("cell[4]" )) .split ("'" )[5 ].split ('"' )
5859 jnf_id , com_id , year = jd_args [1 ], jd_args [3 ], jd_args [5 ]
5960
6061 # Links
@@ -67,24 +68,22 @@ def fetch(session, headers, ssoToken):
6768 form_additional_details = f"https://erp.iitkgp.ac.in/TrainingPlacementSSO/AdmFilePDF.htm?type=JNF&year={ year } &jnf_id={ jnf_id } &com_id={ com_id } "
6869
6970 company_info = {
70- "Name" : row .find ("cell[1]" ). text .split (">" )[1 ].split ("<" )[0 ].strip (),
71+ "Name" : safe_text ( row .find ("cell[1]" )) .split (">" )[1 ].split ("<" )[0 ].strip (),
7172 "Company_Details" : company_details ,
7273 "Company_Additional_Details" : company_additional_details ,
7374 "PPT" : ppt ,
74- "Role" : row .find ("cell[4]" ). text .split ("'" )[1 ].strip (),
75+ "Role" : safe_text ( row .find ("cell[4]" )) .split ("'" )[1 ].strip (),
7576 "Job_Description" : jd ,
7677 "Apply_Link_CV" : apply_link_cv ,
7778 "Additional_Job_Description" : additional_jd ,
7879 "CTC" : get_ctc_with_currency (session , headers , additional_jd ),
7980 "Form_Additional_Details" : form_additional_details ,
80- "Application_Status" : row . find ( "cell[9]" ). text . strip () if row .find ("cell[9]" ). text . strip () else "N" ,
81- "Start_Date" : row .find ("cell[10]" ). text . strip ( ),
82- "End_Date" : row .find ("cell[11]" ). text . strip ( ),
83- "Interview_Date" : row . find ( "cell[12]" ). text . strip () if row .find ("cell[12]" ). text . strip () else None ,
81+ "Application_Status" : safe_text ( row .find ("cell[9]" ), "N" ) ,
82+ "Start_Date" : safe_text ( row .find ("cell[10]" )),
83+ "End_Date" : safe_text ( row .find ("cell[11]" )),
84+ "Interview_Date" : safe_text ( row .find ("cell[12]" ), None ) ,
8485 }
85-
8686 fetched_companies .append (company_info )
87-
8887 stored_companies = get_list ()
8988 new_companies , modified_companies = get_new_and_modified_companies (fetched_companies , stored_companies )
9089
@@ -129,7 +128,7 @@ def get_list():
129128 try :
130129 with open (COMPANIES_FILE , "r" ) as json_file :
131130 return json .load (json_file )
132- except json .JSONDecodeError as _ :
131+ except json .JSONDecodeError :
133132 store_list ([])
134133 return []
135134 except FileNotFoundError :
@@ -138,13 +137,12 @@ def get_list():
138137
139138
140139# Downloads pdf content in bytes format
141- ## Not used currently
140+ # Not used currently
142141def parse_link (session , link ):
143142 stream = session .get (link , stream = True )
144143 attachment = b''
145144 for chunk in stream .iter_content (4096 ):
146145 attachment += chunk
147-
148146 return attachment
149147
150148
@@ -196,7 +194,6 @@ def compare_deadline_lt(company, deadline_key):
196194
197195def parse_date (company , date_key ):
198196 date_format = "%d-%m-%Y %H:%M"
199-
200197 date = None
201198 if company .get (date_key ):
202199 try :
@@ -206,4 +203,3 @@ def parse_date(company, date_key):
206203 date = None
207204
208205 return date
209-
0 commit comments