|
| 1 | +import os |
| 2 | +import json |
1 | 3 | import logging |
2 | 4 | from env import ROLL_NUMBER |
3 | 5 | from datetime import datetime |
|
6 | 8 | from endpoints import TPSTUDENT_URL, COMPANIES_URL |
7 | 9 |
|
8 | 10 |
|
| 11 | +COMPANIES_FILE = f"{os.path.dirname(__file__)}/companies.json" |
| 12 | + |
| 13 | + |
9 | 14 | def filter(companies, filter): |
10 | 15 | print('[FILTERING COMPANY UPDATES]', flush=True) |
11 | 16 |
|
@@ -44,7 +49,7 @@ def fetch(session, headers, ssoToken): |
44 | 49 | xml_encoded = xml_string.encode("utf-8") |
45 | 50 | root = ET.fromstring(xml_encoded) |
46 | 51 |
|
47 | | - companies = [] |
| 52 | + fetched_companies = [] |
48 | 53 | for row in root.findall("row"): |
49 | 54 | jd_args = row.find("cell[4]").text.split("'")[5].split('"') |
50 | 55 | jnf_id, com_id, year = jd_args[1], jd_args[3], jd_args[5] |
@@ -75,9 +80,52 @@ def fetch(session, headers, ssoToken): |
75 | 80 | "Interview_Date": row.find("cell[12]").text.strip() if row.find("cell[12]").text.strip() else None, |
76 | 81 | } |
77 | 82 |
|
78 | | - companies.append(company_info) |
| 83 | + fetched_companies.append(company_info) |
| 84 | + |
| 85 | + stored_companies = get_list() |
| 86 | + new_companies, modified_companies = get_new_and_modified_companies(fetched_companies, stored_companies) |
| 87 | + |
| 88 | + store_list(fetched_companies) |
| 89 | + |
| 90 | + return fetched_companies, new_companies, modified_companies |
| 91 | + |
| 92 | + |
| 93 | +def get_new_and_modified_companies(fetched, stored, unique_key="Job_Description"): |
| 94 | + # Create dictionaries for quick lookup by the unique key |
| 95 | + stored_dict = {entry[unique_key]: entry for entry in stored} |
| 96 | + fetched_dict = {entry[unique_key]: entry for entry in fetched} |
| 97 | + |
| 98 | + new_entries = [] |
| 99 | + updated_entries = [] |
| 100 | + |
| 101 | + for key, fetched_entry in fetched_dict.items(): |
| 102 | + if key not in stored_dict: |
| 103 | + # New entry |
| 104 | + new_entries.append(fetched_entry) |
| 105 | + else: |
| 106 | + # Compare the values of the fetched entry with the stored entry |
| 107 | + stored_entry = stored_dict[key] |
| 108 | + if any(fetched_entry[k] != stored_entry.get(k) for k in fetched_entry): |
| 109 | + updated_entries.append(fetched_entry) |
| 110 | + |
| 111 | + return new_entries, updated_entries |
| 112 | + |
| 113 | + |
| 114 | +def store_list(companies): |
| 115 | + with open(COMPANIES_FILE, "w") as json_file: |
| 116 | + json.dump(companies, json_file, indent=2) |
| 117 | + |
79 | 118 |
|
80 | | - return companies |
| 119 | +def get_list(): |
| 120 | + try: |
| 121 | + with open(COMPANIES_FILE, "r") as json_file: |
| 122 | + return json.load(json_file) |
| 123 | + except json.JSONDecodeError as _: |
| 124 | + store_list([]) |
| 125 | + return [] |
| 126 | + except FileNotFoundError: |
| 127 | + store_list([]) |
| 128 | + return [] |
81 | 129 |
|
82 | 130 |
|
83 | 131 | # Downloads pdf content in bytes format |
|
0 commit comments