-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhelper.py
117 lines (88 loc) · 3.01 KB
/
helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import httpx
from bs4 import BeautifulSoup
import json
import asyncio
import logging
from logger import getLogger
from customTypes import Function
logger = getLogger(__name__)
def extractProblemsFromResponse(r: httpx.Response):
"""
Extracts problem urls from a httpx.Response object
Returns:
list[string]: List of problem urls
"""
doc = BeautifulSoup(r.content, "lxml")
x = doc.findAll("a")
a = [i.get("href") for i in x if i.get("href").startswith("/p/")]
return a
def findSolveRatio(r: httpx.Response):
"""
Finds the solve ratio of a problem from a httpx.Response object
Returns:
list: []
"""
doc = BeautifulSoup(r.content, "lxml")
b = doc.findAll("div", class_="panel__body")[-1].findAll("div")[1:]
c = [i.span.text for i in b]
url = str(r.url).split("/")[-1]
return [int(c[0][:-1]), url]
def findLeaderboard(r: httpx.Response):
"""
Finds the leaderboard of a problem from a httpx.Response object
Returns:
list: [solutionPercentage, earliest, fastest, lightest, shortest]
"""
doc = BeautifulSoup(r.content, "lxml")
try:
b = doc.findAll("div", class_="panel__body")[-1].findAll("div")[1:]
except IndexError:
logger.error(f"Unsolved Problem {r.url}")
raise ValueError(f"Unsolved Problem {r.url}")
c = [i.span.text for i in b]
url = str(r.url).split("/")[-1]
solution, earliest, fastest, lightest, shortest = c
c[1] = url
return c
async def makeBulkRequests(
urls: list[str], req: Function, ses: httpx.AsyncClient, diff: int = 100
):
"""
Makes bulk requests to a list of urls
Args:
urls (list[str]): List of urls to make requests to
req (Function): httpx.AsyncClient.get
ses (httpx.AsyncClient): httpx.AsyncClient
Returns:
list[httpx.Response]: List of httpx.Response objects
"""
totalLen = len(urls)
diff = 50
logger.info(f"Making bulk requests to {totalLen} urls")
problemResponses = []
while urls:
x = urls[:diff]
x = await asyncio.gather(*[req(i, ses) for i in x], return_exceptions=True)
if x[0].status_code == 429:
logger.warning(f"Rate limited")
exit(1)
problemResponses += x
urls = urls[diff:]
length = len(problemResponses)
logger.info(f"Fetched {length}/{totalLen} requests")
if length % 200 == 0:
logger.info("Sleeping for 1 minute")
for i in range(60):
logger.info(f"{60-i} seconds left")
await asyncio.sleep(1)
return problemResponses
def dumpData(data: dict, path: str):
"""
Dumps data to a json file
Args:
data (dict): Data to dump
path (str): Path to dump to
"""
logger.info(f"Dumping data to {path}")
with open(path, "w", encoding="utf8") as f:
json.dump(data, f, ensure_ascii=False)