Skip to content

Commit 0108f2e

Browse files
Expand NUS Kattis functionalities
1 parent 677a158 commit 0108f2e

File tree

4 files changed

+168
-13
lines changed

4 files changed

+168
-13
lines changed

README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,15 @@ kt.ranklist(university='National University of Singapore') # university leaderb
7373
kt.ranklist(university='nus.edu.sg') # use university domain instead
7474
```
7575

76+
### NUS-specific
77+
78+
```py
79+
kt.courses() # current and recently ended courses
80+
kt.offerings('CS3233') # course offerings
81+
kt.assignments('CS3233_S2_AY2223') # course assignments but course ID not provided
82+
kt.assignments('CS3233_S2_AY2223', 'CS3233') # course assignments
83+
```
84+
7685
### Convert to DataFrame
7786

7887
As simple as this!

autokattis/api/__init__.py

Lines changed: 136 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,20 +18,42 @@
1818
from ..utils import guess_id
1919

2020
warnings.warn = lambda *args, **kwargs: None # suppress warnings
21+
truncate = lambda text: text if (new_text:=text.replace(' ', ' ')) == text else truncate(new_text)
2122

2223
class Kattis(requests.Session):
2324

2425
BASE_URL = 'https://open.kattis.com'
2526
MAX_WORKERS = 6
2627

28+
def new_get(self, *args, **kwargs):
29+
try:
30+
return self.get(*args, **kwargs)
31+
except:
32+
return self.new_get(*args, **kwargs)
33+
34+
def new_post(self, *args, **kwargs):
35+
try:
36+
return self.post(*args, **kwargs)
37+
except:
38+
return self.new_post(*args, **kwargs)
39+
2740
class Result(list):
2841
def __init__(self, data):
2942
super().__init__(data)
3043
self.to_df = lambda: pd.DataFrame(data)
31-
44+
45+
def get_base_url(self):
46+
return self.BASE_URL
47+
3248
def set_base_url(self, url):
3349
self.BASE_URL = url
3450

51+
def get_homepage(self):
52+
return self.homepage
53+
54+
def set_homepage(self, hp):
55+
self.homepage = hp
56+
3557
def __init__(self, user, password=None):
3658
'''
3759
A local Kattis session.
@@ -43,7 +65,7 @@ def __init__(self, user, password=None):
4365
self.user, self.password = user, password
4466

4567
# Get CSRF token
46-
response = self.get(f'{self.BASE_URL}/login/email')
68+
response = self.new_get(f'{self.BASE_URL}/login/email')
4769
regex_result = re.findall(r'value="(\d+)"', response.text)
4870
assert len(regex_result) == 1, f'Regex found several possible CSRF tokens, {regex_result}'
4971
self.csrf_token = regex_result[0]
@@ -54,9 +76,8 @@ def __init__(self, user, password=None):
5476
'user': self.user,
5577
'password': self.password
5678
}
57-
response = self.post(f'{self.BASE_URL}/login/email', data=data)
79+
response = self.new_post(f'{self.BASE_URL}/login/email', data=data)
5880
assert response.url.startswith(self.BASE_URL), 'Cannot login to Kattis'
59-
print('Logged in to Kattis!', flush=True)
6081

6182
self.homepage = bs(response.content, features='lxml')
6283
names = []
@@ -67,10 +88,12 @@ def __init__(self, user, password=None):
6788
if len(paths) > 2 and paths[1] == 'users':
6889
names.append(paths[2])
6990
ctr = Counter(names)
91+
assert ctr, 'There are issues when logging in to Kattis, please check your username again'
7092
max_freq = max(ctr.values())
7193
candidate_usernames = [name for name in ctr if ctr[name] == max_freq]
72-
print(f'Candidate username(s): {candidate_usernames}')
94+
print(f'Candidate username(s): {candidate_usernames}', flush=True)
7395
self.user = candidate_usernames[0]
96+
print('Successfully logged in to Kattis!', flush=True)
7497

7598
@lru_cache
7699
def problems(self, show_solved=True, show_partial=True, show_tried=False, show_untried=False):
@@ -98,7 +121,7 @@ def problems(self, show_solved=True, show_partial=True, show_tried=False, show_u
98121
has_content = False
99122
futures.clear()
100123
for _ in range(self.MAX_WORKERS):
101-
futures.append(executor.submit(self.get, f'{self.BASE_URL}/problems', params=params.copy()))
124+
futures.append(executor.submit(self.new_get, f'{self.BASE_URL}/problems', params=params.copy()))
102125
params['page'] += 1
103126
for f in as_completed(futures):
104127
response = f.result()
@@ -116,7 +139,7 @@ def problems(self, show_solved=True, show_partial=True, show_tried=False, show_u
116139
link = f"{self.BASE_URL}{columns[0].find('a').get('href')}"
117140
name = columns[0].text
118141
fastest = float(columns[2].text.replace('--', 'inf'))
119-
shortest = int(columns[3].text.replace('--', '-1'))
142+
shortest = int(float(columns[3].text.replace('--', '-1')))
120143
total = int(columns[4].text)
121144
acc = int(columns[5].text)
122145
try:
@@ -344,7 +367,7 @@ def achievements(self, verbose=False):
344367
link = f"{self.BASE_URL}{columns[0].find('a').get('href')}"
345368
name = columns[0].text
346369
runtime = float(columns[1].text.replace('--', 'inf'))
347-
length = int(columns[2].text.replace('--', '-1'))
370+
length = int(float(columns[2].text.replace('--', '-1')))
348371
if len(columns) == 3:
349372
if not verbose: continue
350373
achievement = ''
@@ -461,7 +484,7 @@ def suggest(self):
461484
Returns a JSON-like structure containing the suggested problems points and its difficulty.
462485
'''
463486

464-
soup = self.homepage
487+
soup = self.get_homepage()
465488
try:
466489
table = soup.find_all('table', class_='table2 report_grid-problems_table')[0]
467490
except:
@@ -494,7 +517,7 @@ def ranklist(self, country=None, university=None):
494517
assert country == None or university == None, 'Both of country and university cannot be given at the same time!'
495518

496519
if country == university == None:
497-
soup = self.homepage
520+
soup = self.get_homepage()
498521
try:
499522
table = soup.find_all('table', class_='table2 report_grid-problems_table')[1]
500523
except:
@@ -710,3 +733,106 @@ def __init__(self, user, password=None):
710733
print('Logging in to NUS Kattis...', flush=True)
711734
self.set_base_url('https://nus.kattis.com')
712735
super().__init__(user, password)
736+
response = self.get(self.get_base_url())
737+
self.set_homepage(bs(response.content, features='lxml'))
738+
739+
@lru_cache
740+
def courses(self):
741+
'''
742+
Lists down only the current courses offered and the courses with recently ended offerings in NUS Kattis.
743+
It does not list all existing courses in NUS Kattis.
744+
'''
745+
746+
tables = self.get_homepage().find_all('table', class_='table2')
747+
if not tables:
748+
return self.Result([])
749+
data = []
750+
for table in tables:
751+
for row in table.find_all('tr'):
752+
columns = row.find_all('td')
753+
columns_text = [truncate(column.text.strip()) for column in columns]
754+
columns_url = [column.find('a') for column in columns]
755+
if columns_text:
756+
href = columns_url[0].get('href')
757+
data.append({
758+
'name': columns_text[0],
759+
'url': self.get_base_url() + href,
760+
'course_id': href.split('/')[-1]
761+
})
762+
return self.Result(sorted(data, key=lambda r: r['course_id']))
763+
764+
@lru_cache
765+
def offerings(self, course_id):
766+
'''
767+
Lists down all offerings within a specific NUS Kattis course.
768+
'''
769+
770+
response = self.get(f'{self.get_base_url()}/courses/{course_id}')
771+
soup = bs(response.content, features='lxml')
772+
table = soup.find('table', class_='table2')
773+
if not table:
774+
return self.Result([])
775+
data = []
776+
for row in table.tbody.find_all('tr'):
777+
columns = row.find_all('td')
778+
try:
779+
name, end_date = [truncate(column.text.strip()) for column in columns]
780+
link, _ = [column.find('a') for column in columns]
781+
data.append({
782+
'name': name,
783+
'end_date': end_date.split()[1][:-1],
784+
'link': self.get_base_url() + link.get('href')
785+
})
786+
except:
787+
pass # ignore for now
788+
return self.Result(sorted(data, key=lambda r: r['end_date'], reverse=True))
789+
790+
@lru_cache
791+
def assignments(self, offering_id, course_id=None):
792+
'''
793+
Lists down all assignments within a specific NUS Kattis course offering.
794+
Problem IDs within a specific assignment are comma-separated, e.g. pid1,pid2,pid3
795+
'''
796+
797+
if course_id == None:
798+
# try to guess
799+
for cid in self.courses().to_df().course_id:
800+
if offering_id in [*self.offerings(cid).to_df().name]:
801+
course_id = cid
802+
break
803+
assert course_id != None, 'Cannot guess course ID automatically, please provide one'
804+
print('Guessed course ID:', course_id, flush=True)
805+
response = self.get(f'{self.get_base_url()}/courses/{course_id}/{offering_id}')
806+
soup = bs(response.content, features='lxml')
807+
data = []
808+
for div in soup.find_all('div', {'class': 'strip-row w-auto'}):
809+
h2 = div.find('h2')
810+
if h2 != None and h2.text.strip() == 'Assignments':
811+
toggle = False
812+
for asg in div.find_all('li'):
813+
if asg.find('span') == None:
814+
if toggle:
815+
data.append({
816+
'id': aid,
817+
'name': name,
818+
'status': status,
819+
'link': link,
820+
'problems': ','.join(pids)
821+
})
822+
name, status = truncate(asg.text.strip()).split('\n')
823+
status = status.replace('(', '').replace(')', '')
824+
link = self.get_base_url() + asg.find('a').get('href')
825+
aid = link.split('/')[-1]
826+
pids = []
827+
toggle = True
828+
else:
829+
pids.append(asg.text.strip())
830+
if toggle:
831+
data.append({
832+
'id': aid,
833+
'name': name,
834+
'status': status,
835+
'link': link,
836+
'problems': ','.join(pids)
837+
})
838+
return self.Result(data)

setup.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setup_args = dict(
77
name='autokattis',
8-
version='1.5',
8+
version='1.6',
99
description='Updated Kattis API wrapper',
1010
long_description_content_type="text/markdown",
1111
long_description=README,
@@ -18,9 +18,9 @@
1818
'requests',
1919
'beautifulsoup4',
2020
'lxml',
21-
'pandas==2.0.3',
21+
'pandas',
2222
'matplotlib',
23-
'seaborn==0.12.2',
23+
'seaborn',
2424
'thefuzz',
2525
'thefuzz[speedup]',
2626
],

test/nus.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,3 +104,23 @@
104104
ret = kt.ranklist(university='nus.edu.sg') # use university domain instead
105105
print(df:=ret.to_df())
106106
df.to_csv('test_nus_ranklist_university_domain.csv', index=False)
107+
108+
print('=== TEST COURSES ===')
109+
ret = kt.courses() # current and recently ended courses
110+
print(df:=ret.to_df())
111+
df.to_csv('test_nus_courses.csv', index=False)
112+
113+
print('=== TEST OFFERINGS (CS3233) ===')
114+
ret = kt.offerings('CS3233') # course offerings
115+
print(df:=ret.to_df())
116+
df.to_csv('test_nus_offerings.csv', index=False)
117+
118+
print('=== TEST ASSIGNMENTS (GUESS) ===')
119+
ret = kt.assignments('CS3233_S2_AY2223') # course assignments but course ID not provided
120+
print(df:=ret.to_df())
121+
df.to_csv('test_nus_assignments_guess.csv', index=False)
122+
123+
print('=== TEST ASSIGNMENTS (MANUAL) ===')
124+
ret = kt.assignments('CS3233_S2_AY2223', 'CS3233') # course assignments
125+
print(df:=ret.to_df())
126+
df.to_csv('test_nus_assignments_manual.csv', index=False)

0 commit comments

Comments
 (0)