Skip to content

Commit 056356f

Browse files
Extend ranklist to support query for top 100
1 parent 0108f2e commit 056356f

File tree

5 files changed

+146
-88
lines changed

5 files changed

+146
-88
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ kt.suggest() # what's the next problem for me?
6767

6868
```py
6969
kt.ranklist() # people around you
70+
kt.ranklist(top_100=True) # show top 100
7071
kt.ranklist(country='Singapore') # country leaderboard
7172
kt.ranklist(country='SGP') # use alpha-3 code instead
7273
kt.ranklist(university='National University of Singapore') # university leaderboard

autokattis/api/__init__.py

Lines changed: 134 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -506,62 +506,25 @@ def suggest(self):
506506
return self.Result(data)
507507

508508
@lru_cache
509-
def ranklist(self, country=None, university=None):
509+
def ranklist(self, top_100=False, country=None, university=None):
510510
'''
511511
Retrieves the current ranklist.
512-
Country or university can be specified, but not both.
512+
Query for top 100 takes precedence over query for country or university.
513+
Otherwise, country or university can be specified, but not both.
513514
514515
Default: ranklist of people around you.
515516
'''
516517

517-
assert country == None or university == None, 'Both of country and university cannot be given at the same time!'
518-
519-
if country == university == None:
520-
soup = self.get_homepage()
521-
try:
522-
table = soup.find_all('table', class_='table2 report_grid-problems_table')[1]
523-
except:
524-
return self.Result([])
525-
if not table:
526-
return self.Result([])
527-
data = []
528-
for row in table.tbody.find_all('tr'):
529-
columns = row.find_all('td')
530-
rank, name, pts, *_ = [column.text.strip() for column in columns]
531-
rank = int(rank) if rank.isdigit() else None
532-
pts = float(re.findall(r'[\d\.]+', pts)[0])
533-
findall = columns[1].find_all('a')
534-
535-
new_data = {
536-
'rank': rank,
537-
'name': name,
538-
'points': pts,
539-
'country': None,
540-
'university': None
541-
}
542-
543-
for urlsplit, title in [(column.get('href').split('/'), column.get('title')) for column in findall]:
544-
assert sum(x in urlsplit for x in ['users', 'universities', 'countries']) == 1, 'Only one field should be present'
545-
if 'users' in urlsplit:
546-
new_data['username'] = urlsplit[-1] # guaranteed to exist
547-
elif 'universities' in urlsplit:
548-
new_data['university_code'] = urlsplit[-1]
549-
new_data['university'] = title
550-
elif 'countries' in urlsplit:
551-
new_data['country_code'] = urlsplit[-1]
552-
new_data['country'] = title
553-
data.append(new_data)
554-
elif country != None:
555-
country_code = guess_id(country, COUNTRIES)
556-
response = self.get(f'{self.BASE_URL}/countries/{country_code}')
518+
data = []
519+
if top_100:
520+
response = self.get(f'{self.BASE_URL}/ranklist')
557521
soup = bs(response.content, features='lxml')
558522
try:
559523
table = soup.find('table', class_='table2 report_grid-problems_table', id='top_users')
560524
except:
561525
return self.Result([])
562526
if not table:
563527
return self.Result([])
564-
data = []
565528
headers = [re.findall(r'[A-Za-z]+', h.text)[0] for h in table.find_all('th')]
566529
for row in table.tbody.find_all('tr'):
567530
columns = row.find_all('td')
@@ -593,59 +556,143 @@ def ranklist(self, country=None, university=None):
593556
'name': name,
594557
'username': username,
595558
'points': pts,
596-
'country_code': country_code,
597-
'country': COUNTRIES[country_code],
559+
'country_code': country_code if country else None,
560+
'country': country,
598561
'subdivision_code': subdivision_code if subdivision else None,
599562
'subdivision': subdivision if subdivision else None,
600563
'university_code': university_code if university else None,
601564
'university': university if university else None
602565
})
603566
else:
604-
university_code = guess_id(university, UNIVERSITIES)
605-
response = self.get(f'{self.BASE_URL}/universities/{university_code}')
606-
soup = bs(response.content, features='lxml')
607-
table = soup.find('table', class_='table2 report_grid-problems_table', id='top_users')
608-
if not table:
609-
return self.Result([])
610-
data = []
611-
headers = [re.findall(r'[A-Za-z]+', h.text)[0] for h in table.find_all('th')]
612-
for row in table.tbody.find_all('tr'):
613-
columns = row.find_all('td')
614-
columns_text = [column.text.strip() for column in columns]
615-
columns_url = [column.find_all('a') for column in columns]
567+
assert country == None or university == None, 'Both of country and university cannot be given at the same time!'
568+
569+
if country == university == None:
570+
soup = self.get_homepage()
571+
try:
572+
table = soup.find_all('table', class_='table2 report_grid-problems_table')[1]
573+
except:
574+
return self.Result([])
575+
if not table:
576+
return self.Result([])
577+
for row in table.tbody.find_all('tr'):
578+
columns = row.find_all('td')
579+
rank, name, pts, *_ = [column.text.strip() for column in columns]
580+
rank = int(rank) if rank.isdigit() else None
581+
pts = float(re.findall(r'[\d\.]+', pts)[0])
582+
findall = columns[1].find_all('a')
616583

617-
rank = int(columns_text[0])
618-
name = columns_text[1]
619-
pts = float(columns_text[-1])
620-
name_urls = columns_url[1]
621-
username = name_urls[0].get('href').split('/')[-1] # guaranteed to exist
584+
new_data = {
585+
'rank': rank,
586+
'name': name,
587+
'points': pts,
588+
'country': None,
589+
'university': None
590+
}
591+
592+
for urlsplit, title in [(column.get('href').split('/'), column.get('title')) for column in findall]:
593+
assert sum(x in urlsplit for x in ['users', 'universities', 'countries']) == 1, 'Only one field should be present'
594+
if 'users' in urlsplit:
595+
new_data['username'] = urlsplit[-1] # guaranteed to exist
596+
elif 'universities' in urlsplit:
597+
new_data['university_code'] = urlsplit[-1]
598+
new_data['university'] = title
599+
elif 'countries' in urlsplit:
600+
new_data['country_code'] = urlsplit[-1]
601+
new_data['country'] = title
602+
data.append(new_data)
603+
elif country != None:
604+
country_code = guess_id(country, COUNTRIES)
605+
response = self.get(f'{self.BASE_URL}/countries/{country_code}')
606+
soup = bs(response.content, features='lxml')
607+
try:
608+
table = soup.find('table', class_='table2 report_grid-problems_table', id='top_users')
609+
except:
610+
return self.Result([])
611+
if not table:
612+
return self.Result([])
613+
headers = [re.findall(r'[A-Za-z]+', h.text)[0] for h in table.find_all('th')]
614+
for row in table.tbody.find_all('tr'):
615+
columns = row.find_all('td')
616+
columns_text = [column.text.strip() for column in columns]
617+
columns_url = [column.find_all('a') for column in columns]
618+
619+
rank = int(columns_text[0])
620+
name = columns_text[1]
621+
pts = float(columns_text[-1])
622+
name_urls = columns_url[1]
623+
username = name_urls[0].get('href').split('/')[-1] # guaranteed to exist
624+
625+
if 'Subdivision' in headers:
626+
subdivision = columns_text[2]
627+
subdivision_urls = columns_url[2]
628+
subdivision_code = subdivision_urls[0].get('href').split('/')[-1] if subdivision_urls else None
629+
else:
630+
subdivision = None
622631

623-
if 'Country' in headers:
624-
country = columns_text[2]
625-
country_urls = columns_url[2]
626-
country_code = country_urls[0].get('href').split('/')[-1] if country_urls else None
627-
else:
628-
country = None
632+
if 'University' in headers:
633+
university = columns_text[-2]
634+
university_urls = columns_url[-2]
635+
university_code = university_urls[0].get('href').split('/')[-1] if university_urls else None
636+
else:
637+
university = None
629638

630-
if 'Subdivision' in headers:
631-
subdivision = columns_text[-2]
632-
subdivision_urls = columns_url[-2]
633-
subdivision_code = subdivision_urls[0].get('href').split('/')[-1] if subdivision_urls else None
634-
else:
635-
subdivision = None
639+
data.append({
640+
'rank': rank,
641+
'name': name,
642+
'username': username,
643+
'points': pts,
644+
'country_code': country_code,
645+
'country': COUNTRIES[country_code],
646+
'subdivision_code': subdivision_code if subdivision else None,
647+
'subdivision': subdivision if subdivision else None,
648+
'university_code': university_code if university else None,
649+
'university': university if university else None
650+
})
651+
else:
652+
university_code = guess_id(university, UNIVERSITIES)
653+
response = self.get(f'{self.BASE_URL}/universities/{university_code}')
654+
soup = bs(response.content, features='lxml')
655+
table = soup.find('table', class_='table2 report_grid-problems_table', id='top_users')
656+
if not table:
657+
return self.Result([])
658+
headers = [re.findall(r'[A-Za-z]+', h.text)[0] for h in table.find_all('th')]
659+
for row in table.tbody.find_all('tr'):
660+
columns = row.find_all('td')
661+
columns_text = [column.text.strip() for column in columns]
662+
columns_url = [column.find_all('a') for column in columns]
663+
664+
rank = int(columns_text[0])
665+
name = columns_text[1]
666+
pts = float(columns_text[-1])
667+
name_urls = columns_url[1]
668+
username = name_urls[0].get('href').split('/')[-1] # guaranteed to exist
669+
670+
if 'Country' in headers:
671+
country = columns_text[2]
672+
country_urls = columns_url[2]
673+
country_code = country_urls[0].get('href').split('/')[-1] if country_urls else None
674+
else:
675+
country = None
636676

637-
data.append({
638-
'rank': rank,
639-
'name': name,
640-
'username': username,
641-
'points': pts,
642-
'country_code': country_code if country else None,
643-
'country': country if country else None,
644-
'subdivision_code': subdivision_code if subdivision else None,
645-
'subdivision': subdivision if subdivision else None,
646-
'university_code': university_code,
647-
'university': UNIVERSITIES[university_code]
648-
})
677+
if 'Subdivision' in headers:
678+
subdivision = columns_text[-2]
679+
subdivision_urls = columns_url[-2]
680+
subdivision_code = subdivision_urls[0].get('href').split('/')[-1] if subdivision_urls else None
681+
else:
682+
subdivision = None
683+
684+
data.append({
685+
'rank': rank,
686+
'name': name,
687+
'username': username,
688+
'points': pts,
689+
'country_code': country_code if country else None,
690+
'country': country if country else None,
691+
'subdivision_code': subdivision_code if subdivision else None,
692+
'subdivision': subdivision if subdivision else None,
693+
'university_code': university_code,
694+
'university': UNIVERSITIES[university_code]
695+
})
649696
return self.Result(data)
650697

651698
@lru_cache
@@ -820,7 +867,7 @@ def assignments(self, offering_id, course_id=None):
820867
'problems': ','.join(pids)
821868
})
822869
name, status = truncate(asg.text.strip()).split('\n')
823-
status = status.replace('(', '').replace(')', '')
870+
status = status.replace('(', '').replace(')', '').strip()
824871
link = self.get_base_url() + asg.find('a').get('href')
825872
aid = link.split('/')[-1]
826873
pids = []

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setup_args = dict(
77
name='autokattis',
8-
version='1.6',
8+
version='1.6.1',
99
description='Updated Kattis API wrapper',
1010
long_description_content_type="text/markdown",
1111
long_description=README,

test/main.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,11 @@
8585
print(df:=ret.to_df())
8686
df.to_csv('test_ranklist_default.csv', index=False)
8787

88+
print('=== TEST RANKLIST (TOP 100) ===')
89+
ret = kt.ranklist(top_100=True) # show top 100
90+
print(df:=ret.to_df())
91+
df.to_csv('test_ranklist_top100.csv', index=False)
92+
8893
print('=== TEST RANKLIST (COUNTRY) ===')
8994
ret = kt.ranklist(country='Singapore') # country leaderboard
9095
print(df:=ret.to_df())

test/nus.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,11 @@
8585
print(df:=ret.to_df())
8686
df.to_csv('test_nus_ranklist_default.csv', index=False)
8787

88+
print('=== TEST RANKLIST (TOP 100) ===')
89+
ret = kt.ranklist(top_100=True) # show top 100
90+
print(df:=ret.to_df())
91+
df.to_csv('test_nus_ranklist_top100.csv', index=False)
92+
8893
print('=== TEST RANKLIST (COUNTRY) ===')
8994
ret = kt.ranklist(country='Singapore') # country leaderboard
9095
print(df:=ret.to_df())

0 commit comments

Comments
 (0)