-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrunner.py
413 lines (383 loc) · 13.9 KB
/
runner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
import constants
import os
import pandas as pd
import subprocess
import sys
import validators
from datetime import *
from rich.console import Console
from scripts.models import entry, status
from scripts.services.auto import AutoService
from scripts.utils.process import SubprocessService
from scripts.utils.gum import Gum
from scripts.utils.helpers import (
get_terminal_width,
file_preview,
load_urls_from_file,
print_tabbed_doc_string,
)
global bkp_flag
bkp_flag = False
def main():
# Set pandas config to print columns with max width
pd.set_option("display.max_colwidth", constants.MAX_COL_WIDTH)
# Check if source CSV file exists
if os.path.isfile(constants.SOURCE_CSV) == False:
print(
"Source CSV file does not exist. Creating new file named job_applications.csv..."
)
try:
df = pd.DataFrame(columns=constants.COLUMN_NAMES)
df.to_csv(constants.SOURCE_CSV, index=False)
except Exception as e:
print(e)
return
else:
print(
f"{constants.OKGREEN}Source file job_applications.csv found{constants.ENDC}.\nWhat do you want to do?"
)
# Ask user what they want to do
try:
opts = (
[
constants.VIEW,
constants.ADD,
constants.EDIT,
constants.PRT,
constants.AUTO,
constants.QUIT,
]
if f"{os.uname().sysname} {os.uname().machine}" != "Linux arm64"
else [
constants.VIEW,
constants.ADD,
constants.EDIT,
constants.PRT,
constants.QUIT,
]
)
choice = Gum.choose([*opts])
menu_choice(constants.CHOICE_MAP[choice])()
except Exception as e:
print(e)
raise
# return
# Switch case for menu choice
def menu_choice(choice):
switcher = {
"view": view,
"add": add,
"edit": edit,
"quit": quit,
"bkp": bkp,
"auto": auto,
"print": print_to_file,
}
func = switcher.get(choice, lambda: "Invalid choice")
return func
def view():
if os.path.isfile(constants.SOURCE_CSV) == False:
print("Source CSV file does not exist. Please add a job entry first.")
return
df = pd.read_csv(constants.SOURCE_CSV)
# Ask user if they want to sort by column
print("Do you want to sort output by a column?")
print(f"Possible columns: {[*constants.COLUMN_NAMES]}")
sort_choice = Gum.choose([*constants.NY])
# Have a default sort option
if sort_choice == "YES":
print(
f"{constants.OKGREEN}Choose option to sort by a specific column, if any. Otherwise, select `default`.{constants.ENDC}"
)
choices = constants.COLUMN_NAMES
sort_column = Gum.choose([*choices])
# Sort dataframe
df = df.sort_values(by=[sort_column])
# Print dataframe
file_preview(df)
def add():
print("Adding new job application...")
# Ask user for company name
company_name = Gum.input(placeholder=constants.INPUT_COMPANY_NAME)
# Ask user for position
position = Gum.input(placeholder=constants.INPUT_POSITION)
# Ask user for date applied
print("Choose date applied:")
date_choice = Gum.choose([constants.DATE_NOW, constants.DATE_CUSTOM])
formatted_date = ""
if date_choice == constants.DATE_NOW:
date_applied = date.today()
formatted_date = date_applied.strftime("%m/%d/%Y")
elif date_choice == constants.DATE_CUSTOM:
date_applied = Gum.input(placeholder=constants.INPUT_DATE_APPLIED)
# Ask user for status
print("Choose current status:")
current_status = Gum.choose(
[
constants.STATUS_INIT,
constants.STATUS_ASSESSMENT,
constants.STATUS_INTERVIEW,
constants.STATUS_OFFER,
constants.STATUS_REJECTED,
]
)
# Ask user for portal link
success_flag = True
while success_flag:
portal_link = Gum.input(
[constants.INPUT_PORTAL_LINK + f". {constants.INPUT_QUIT}"]
)
if portal_link == "Q":
quit()
# Validate portal link
if not validators.url(portal_link):
print("Invalid URL. Please try again.")
continue
else:
success_flag = False
# Ask user for notes
notes = Gum.input(placeholder=constants.INPUT_NOTES)
# Create entry
new_entry = entry.Entry(
company=company_name,
position=position,
date_applied=date_applied,
status=status.Status(current_status),
link=portal_link,
notes=notes,
)
df = new_entry.create_dataframe()
# Append dataframe to CSV
df.to_csv(constants.SOURCE_CSV, mode="a", header=False, index=False)
print("Job entry added!")
def edit():
# Make sure job_applications.csv exists
if os.path.isfile(constants.SOURCE_CSV) == False:
print("Source CSV file does not exist. Please add a job entry first.")
return
success_flag = True
check_out = constants.DEFAULT_COLUMN_CHOOSE
while success_flag:
# Ask user to choose job entry
column = subprocess.Popen(
["column", "-s,", "-t", f"{constants.SOURCE_CSV}"],
stdout=subprocess.PIPE,
shell=False,
)
less = subprocess.Popen(
["less", "-#2", "-N", "-S"], stdin=column.stdout, stdout=subprocess.PIPE
)
column.stdout.close()
gum = subprocess.Popen(
[*constants.GUM_FILTER], stdin=less.stdout, stdout=subprocess.PIPE
)
less.stdout.close()
output = gum.communicate()[0].decode("utf-8")
echo_output = subprocess.Popen(["echo", f"{output}"], stdout=subprocess.PIPE)
awk_check = subprocess.Popen(
["awk", "-F", "[[:space:]][[:space:]]+", "{print $1}"],
stdin=echo_output.stdout,
stdout=subprocess.PIPE,
)
echo_output.stdout.close()
check_out = awk_check.communicate()[0].decode("utf-8").strip()
if check_out == "Company":
print(f"{constants.FAIL}You cannot edit a column header!{constants.ENDC}")
print("Do you want to try again?")
retry_choice = Gum.choose([*constants.YN])
if retry_choice == "YES":
continue
else:
return
elif check_out == constants.DEFAULT_COLUMN_CHOOSE:
print(f"{constants.FAIL}No entry was chosen!{constants.ENDC}")
continue
elif check_out is None:
print(f"{constants.FAIL}No entry was chosen!{constants.ENDC}")
continue
else:
success_flag = False
# Get rows that match check_out
df = pd.read_csv(constants.SOURCE_CSV)
original_df = df.copy()
# Get original index of matching rows
df = df.loc[df[constants.COLUMN_NAMES[0]] == check_out]
original_index = df.index.values[0]
print(
f"{constants.OKGREEN}Here are the entries that match your search:{constants.ENDC}"
)
# If there are multiple rows, ask user to choose one
if len(df.index) > 1:
print(df)
print(
f"{constants.WARNING}Multiple entries found. Please choose one:{constants.ENDC}"
)
terminal_width = get_terminal_width()
if terminal_width == None:
terminal_width = 80
# Ask user to choose one row (show complete row)
dup_companies = []
for row in df.values.tolist():
row_str = "\t".join(row[:4])
trunc_row_str = row_str[:terminal_width]
dup_companies.append(trunc_row_str)
company_row = subprocess.Popen(
[*constants.GUM_CHOOSE] + dup_companies, stdout=subprocess.PIPE, shell=False
)
position = subprocess.Popen(
["awk", "-F", "\t", "{print $2}"],
stdin=company_row.stdout,
stdout=subprocess.PIPE,
)
company_row.stdout.close()
position_output = position.communicate()[0].decode("utf-8").strip()
intermidiate_index = df.index[df["Position"] == position_output].tolist()[0]
df = df.loc[df["Position"] == position_output]
original_index = original_index + intermidiate_index - 1
elif len(df.index) == 0:
print(f"{constants.FAIL}No entries found!{constants.ENDC}")
return
old_df = df.copy()
# Ask user if they want to update or delete the entry
print("What do you want to do?")
update_choice = Gum.choose(["Update", "Delete"])
if update_choice == "Update":
update(df, original_df, original_index, old_df)
elif update_choice == "Delete":
delete(df, original_df, original_index)
def delete(df, original_df, original_index):
print("Confirm deletion?")
delete_choice = Gum.choose([*constants.YN])
if delete_choice == "YES":
# Delete row from dataframe
original_df = original_df.drop(original_index)
# Write to CSV
original_df.to_csv(constants.SOURCE_CSV, index=False)
print("Entry deleted!")
print(df)
else:
print("Deletion not confirmed. Exiting...")
def update(df, original_df, original_index, old_df):
# Ask user if they want to update the status or any other column
print("What do you want to update?")
update_choice = Gum.choose(["Status", "Other"])
if update_choice == "Status":
print("Choose new status:")
current_status = Gum.choose(
[
constants.STATUS_INIT,
constants.STATUS_ASSESSMENT,
constants.STATUS_INTERVIEW,
constants.STATUS_OFFER,
constants.STATUS_REJECTED,
]
)
df["Status"] = current_status
elif update_choice == "Other":
print("Choose column to update:")
column_choice = Gum.choose([*constants.COLUMN_NAMES])
# Ask user for new value
new_value = Gum.input(placeholder=f"Input new {column_choice if column_choice != 'Date Applied' else {constants.INPUT_DATE_APPLIED}}")
if new_value == "":
print("No changes made.")
return
if column_choice == "Date Applied":
df[column_choice] = datetime.strptime(new_value, "%m/%d/%Y").date()
df[column_choice] = new_value
# Confirm changes
print("Confirm changes?")
confirm_choice = Gum.choose([*constants.YN])
if confirm_choice == "Yes":
# Update CSV
original_df.loc[original_index] = df.iloc[0]
original_df.to_csv(constants.SOURCE_CSV, index=False)
print("Entry updated!")
else:
print("Changes not saved.")
# Print old and new entries
print(f"{constants.OKGREEN}Old entry:{constants.ENDC}")
print(old_df)
print(f"{constants.OKGREEN}New entry:{constants.ENDC}")
print(df)
def bkp():
if not bkp_flag:
print(
f"{constants.FAIL}Backup process flag was not passed. Invalid operation.{constants.ENDC}"
)
sys.exit(1)
print("Placeholder backup function. TODO")
def print_to_file():
if os.path.isfile(constants.SOURCE_CSV) == False:
print("Source CSV file does not exist. Please add a job entry first.")
return
df = pd.read_csv(constants.SOURCE_CSV)
print("Printing to file...")
file_preview(df, ptf_flag=True)
def auto():
service = AutoService() # Initialize AutoService
# Ask user for job posting URL
success_flag = True
while success_flag:
url = Gum.input(
placeholder=f"{constants.INPUT_JOB_POSTING_URL} "
+ f"{constants.INPUT_QUIT} "
+ f"{constants.INPUT_MASS_ADD}"
)
if url == "Q":
quit()
if url == "M":
url = load_urls_from_file()
# Check if there are multiple urls
url.replace(" ", "")
urls = url.split(",") if url.find(",") != -1 else [url]
if not all(urls) or not all(validators.url(url) for url in urls):
print("Invalid URL found. Please try again.")
continue
success_flag = False
print(f"{constants.OKGREEN}Cooking...{constants.ENDC}", end=" ")
Console().print(":man_cook:")
dfs = []
while True:
try:
df, failed_urls = service.batch_run(urls)
dfs.append(df)
if failed_urls:
print_tabbed_doc_string(
f"{constants.PROJECT_ROOT}/docs/shell/scrape_fail.txt"
)
print(f"Do you want to retry {len(failed_urls)} failed URL(s)?")
retry_choice = Gum.choose([*constants.YN])
if retry_choice == "YES":
urls = failed_urls
continue
if df.empty:
return
break
except Exception as e:
print(e)
return
finish_auto_service(pd.concat(dfs, ignore_index=True))
def quit():
print(f"{constants.OKGREEN}Exiting...{constants.ENDC}")
sys.exit(0)
def finish_auto_service(df):
print("===== Scraper Results =====")
print(f"{constants.OKGREEN}Scraped {len(df.index)} job(s)!{constants.ENDC}")
print(df)
print("===== End of Results =====")
print("Does this look correct? Confirming will write entry to file.")
confirm_choice = Gum.choose([*constants.YN])
if confirm_choice == "YES":
# Append to CSV
df.to_csv(constants.SOURCE_CSV, mode="a", header=False, index=False)
print("Entry written to file!")
else:
print(
"Entry not written to file.\nIf you found errors in the generated entries, they can be manually edited after being committed to the CSV file."
)
if __name__ == "__main__":
# Check if bkp flag is set
if len(sys.argv) > 1:
if sys.argv[1] == constants.BKP_FLAG:
bkp_flag = True
main()