Skip to content

Commit f1fcaa3

Browse files
authored
Paper scoring system (#67)
* Add focus system for scoring * Add mostly_latin and test_utils * Increase default limit for OpenReview scraper * Split/lowercase institutions before scoring * Fix imports of PaperInfo, update dependencies
1 parent cb388cd commit f1fcaa3

24 files changed

+816
-356
lines changed

src/paperoni/__main__.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@
88
from gifnoc import add_overlay, cli
99
from serieux import Auto, Registered, Tagged, TaggedUnion, serialize, singleton
1010

11-
from .config import discoverers
12-
from .discovery.base import PaperInfo
11+
from .config import config, discoverers
1312
from .display import display, terminal_width
13+
from .model import PaperInfo
1414

1515

1616
class Formatter(Registered):
@@ -60,8 +60,14 @@ class Discover:
6060
# Output format
6161
format: Formatter = TerminalFormatter
6262

63+
# Top n entries
64+
top: int = 0
65+
6366
def run(self):
64-
self.format(self.command())
67+
papers = self.command()
68+
if self.top:
69+
papers = config.focuses.top(n=self.top, pinfos=papers)
70+
self.format(papers)
6571

6672
@dataclass
6773
class PaperoniInterface:

src/paperoni/config.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from serieux import TaggedSubclass
1010

1111
from .discovery.base import Discoverer
12+
from .model.focus import Focuses
1213

1314

1415
@dataclass
@@ -32,6 +33,7 @@ class PaperoniConfig:
3233
cache_path: Path = None
3334
requests: RequesterConfig = field(default_factory=RequesterConfig)
3435
discovery: dict[str, TaggedSubclass[Discoverer]] = field(default_factory=dict)
36+
focuses: Focuses = field(default_factory=Focuses)
3537

3638

3739
config = gifnoc.define(

src/paperoni/discovery/base.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
11
from dataclasses import dataclass
2-
from datetime import datetime
3-
4-
from ..model.classes import Paper
52

63

74
@dataclass
@@ -11,11 +8,3 @@ class Discoverer:
118

129
class QueryError(Exception):
1310
pass
14-
15-
16-
@dataclass(kw_only=True)
17-
class PaperInfo:
18-
paper: Paper
19-
key: str
20-
update_key: str = None
21-
acquired: datetime

src/paperoni/discovery/jmlr.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,13 @@
1111
Link,
1212
Paper,
1313
PaperAuthor,
14+
PaperInfo,
1415
Release,
1516
Venue,
1617
VenueType,
1718
)
1819
from ..utils import asciiify
19-
from .base import Discoverer, PaperInfo
20+
from .base import Discoverer
2021

2122

2223
class JMLR(Discoverer):

src/paperoni/discovery/miniconf.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,13 @@
1212
Link,
1313
Paper,
1414
PaperAuthor,
15+
PaperInfo,
1516
Release,
1617
Topic,
1718
Venue,
1819
VenueType,
1920
)
20-
from .base import Discoverer, PaperInfo
21+
from .base import Discoverer
2122

2223
conference_urls = {
2324
"neurips": "neurips.cc",

src/paperoni/discovery/openalex.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,14 @@
1212
Link,
1313
Paper,
1414
PaperAuthor,
15+
PaperInfo,
1516
Release,
1617
Topic,
1718
Venue,
1819
VenueType,
1920
)
2021
from ..utils import QueryError, link_generators as LINK_GENERATORS
21-
from .base import Discoverer, PaperInfo
22+
from .base import Discoverer
2223

2324
# https://docs.openalex.org/api-entities/institutions/institution-object#type
2425
INSTITUTION_CATEGORY_MAPPING = {

src/paperoni/discovery/openreview.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,13 @@
1313
Link,
1414
Paper,
1515
PaperAuthor,
16+
PaperInfo,
1617
Release,
1718
Topic,
1819
Venue,
1920
VenueType,
2021
)
21-
from .base import Discoverer, PaperInfo
22+
from .base import Discoverer
2223

2324

2425
def extract_date(txt: str) -> dict | None:
@@ -511,7 +512,7 @@ def query(
511512
# Block size for fetching results
512513
block_size: int = 100,
513514
# Maximum number of results to return
514-
limit: int = 10000,
515+
limit: int = 100000,
515516
):
516517
"""Query OpenReview"""
517518
for api_version in self.api_versions:

src/paperoni/discovery/pmlr.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,13 @@
1010
Link,
1111
Paper,
1212
PaperAuthor,
13+
PaperInfo,
1314
Release,
1415
Venue,
1516
VenueType,
1617
)
1718
from ..utils import asciiify
18-
from .base import Discoverer, PaperInfo
19+
from .base import Discoverer
1920

2021

2122
def parse_paper(entry):

src/paperoni/discovery/semantic_scholar.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,13 @@
99
Link,
1010
Paper,
1111
PaperAuthor,
12+
PaperInfo,
1213
Release,
1314
Topic,
1415
Venue,
1516
VenueType,
1617
)
17-
from .base import Discoverer, PaperInfo, QueryError
18+
from .base import Discoverer, QueryError
1819

1920
external_ids_mapping = {
2021
"pubmedcentral": "pmc",

src/paperoni/model/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
Link,
88
Paper,
99
PaperAuthor,
10+
PaperInfo,
1011
Release,
1112
Topic,
1213
Venue,
@@ -22,6 +23,7 @@
2223
"Link",
2324
"Paper",
2425
"PaperAuthor",
26+
"PaperInfo",
2527
"Release",
2628
"Topic",
2729
"Venue",

0 commit comments

Comments
 (0)