-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfetcher.py
More file actions
73 lines (57 loc) · 2.55 KB
/
fetcher.py
File metadata and controls
73 lines (57 loc) · 2.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#%%## Importations #####
import warnings
warnings.filterwarnings("ignore") # Suppress UserWarnings
import json
import pandas as pd
from tqdm import tqdm
import streamlit as st
import streamlit.components.v1 as stc # Import stc for using HTML components directly
from funcs import Bcolors
from funcs import (sfe_list, reseautee_list, cnrs_list, inrae_list,
inria_list, ird_list, cirad_list, euraxess_list, ifremer_list,
specifics_list)
from funcs import concours_alert
#%%## Parameters #####
# keywords inputs (non-user-friendly)
with open("keywords.json", "r") as f:
keywords = json.load(f)
# User input (typing-friendly)
date_cutoff = input("Cutoff date for fetching offers? (format: YYYY-MM-DD) ")
#%%## Main #####
# Import all job offers
functions = [sfe_list, cnrs_list, inrae_list, inria_list, ird_list, cirad_list, euraxess_list, ifremer_list, specifics_list]
names = ["SFE", "CNRS", "INRAE", "INRIA", "IRD", "CIRAD", "Euraxess", "IFREMER", "others"]
for function, name in tqdm(zip(functions, names), leave=False, desc="Fetching offers", total=10):
tqdm.write(f"Treating {name} offers...")
if name == "SFE": # initialisation of the df
df_offers = function(date_cutoff)
elif name in ["INRAE", "INRIA", "IRD", "others"]:
df_offers = pd.concat([df_offers, function()])
else:
df_offers = pd.concat([df_offers, function(date_cutoff)])
tqdm.write(f"Table currently has {len(df_offers)} rows.\n")
tqdm.write(f"{Bcolors.OKGREEN}Successfully imported all job offers{Bcolors.ENDC}.\n")
concours_alert()
print(f"\nRemoving offers based on custom keywords...")
# Remove a maximum of useless offers
for city in keywords["places_to_exclude"]:
mask = df_offers['Location'].str.contains(city, case=False)
mask = mask.fillna(False)
df_offers = df_offers[~mask]
for tte in keywords["wrong_job_types"]:
mask = df_offers['Type'].str.contains(tte, case=False)
mask = mask.fillna(False)
df_offers = df_offers[~mask]
# Keep only offers that have the following keywords
mask = df_offers['Title'].str.contains("é", case=False)
for keyword in keywords["good_omen"]:
mask = mask + df_offers['Title'].str.contains(keyword, case=False)
df_offers = df_offers[mask]
# Exclude offers that don't have the following keywords
for keyword in keywords["bad_omen"]:
mask = df_offers['Title'].str.contains(keyword, case=False)
mask = mask.fillna(False)
df_offers = df_offers[~mask]
print(f"\nFinal dataframe contains {len(df_offers)} offers.")
# Save the remaining offers
df_offers.to_csv("last_batch_of_job_offers.csv")