-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscrape_open_review.py
82 lines (65 loc) · 2.34 KB
/
scrape_open_review.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
"""
Constructs the open_review distributions.
"""
from collections import defaultdict
import numpy as np
import openreview
import pandas as pd
client = openreview.Client(baseurl="https://api.openreview.net")
def scrape():
"""
Uses OenReview API to scrape ICLR reviews.
"""
invites = [
(
2018,
"ICLR.cc/2018/Conference/-/Blind_Submission",
"ICLR.cc/2018/Conference/-/Paper.*/Official_Review",
),
(
2019,
"ICLR.cc/2019/Conference/-/Blind_Submission",
"ICLR.cc/2019/Conference/-/Paper.*/Official_Review",
),
(
2020,
"ICLR.cc/2020/Conference/-/Blind_Submission",
"ICLR.cc/2020/Conference/Paper.*/-/Official_Review",
),
(
2021,
"ICLR.cc/2021/Conference/-/Blind_Submission",
"ICLR.cc/2021/Conference/Paper.*/-/Official_Review",
),
]
metadata = []
for year, submission_invite, review_invite in invites:
submissions = openreview.tools.iterget_notes(
client, invitation=submission_invite
)
submissions_by_forum = {n.forum: n for n in submissions}
reviews = openreview.tools.iterget_notes(client, invitation=review_invite)
reviews_by_forum = defaultdict(list)
for review in reviews:
reviews_by_forum[review.forum].append(review)
for forum in submissions_by_forum:
forum_reviews = reviews_by_forum[forum]
review_ratings = [int(n.content["rating"][0]) for n in forum_reviews]
average_rating = np.mean(review_ratings)
submission_content = submissions_by_forum[forum].content
abstract = submission_content["abstract"]
forum_metadata = {
"forum": forum,
"review_ratings": review_ratings,
"average_rating": average_rating,
"abstract": abstract,
"year": year,
}
metadata.append(forum_metadata)
df = pd.DataFrame(metadata)
great_papers = df[df.average_rating >= 7].abstract.tolist()
good_papers = df[
(df.average_rating >= 5) & (df.average_rating < 7)
].abstract.tolist()
bad_papers = df[df.average_rating < 5].abstract.tolist()
return great_papers, good_papers, bad_papers