Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 20 additions & 20 deletions src/paperoni/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
from .fulltext.locate import URL, locate_all
from .fulltext.pdf import PDF, CachePolicies, get_pdf
from .heuristics import simplify_paper
from .model import Link, Paper, PaperInfo
from .model import Link, Paper
from .model.focus import Focuses, Scored, Top
from .model.merge import PaperWorkingSet, merge_all
from .model.utils import paper_has_updated
Expand Down Expand Up @@ -111,7 +111,7 @@ class Productor:
Any, FromEntryPoint("paperoni.discovery", wrap=lambda cls: Auto[cls.query])
]

async def iterate(self, **kwargs) -> AsyncGenerator[PaperInfo, None]:
async def iterate(self, **kwargs) -> AsyncGenerator[Paper, None]:
async for p in self.command(**kwargs):
send(discover=p)
yield p
Expand All @@ -128,11 +128,11 @@ class Discover(Productor):
top: int = 0

async def run(self):
typ = PaperInfo
typ = Paper
papers = [p async for p in self.iterate()]
if self.top:
papers = config.focuses.top(n=self.top, pinfos=papers)
typ = Scored[PaperInfo]
papers = config.focuses.top(n=self.top, papers=papers)
typ = Scored[Paper]
self.format(papers, typ=typ)


Expand Down Expand Up @@ -228,8 +228,8 @@ async def run(self):

if self.norm:
results = [
normalize_paper(pinfo.paper, force=self.force, **norm_args(self.norm))
for pinfo in results
normalize_paper(paper, force=self.force, **norm_args(self.norm))
for paper in results
]

if self.merge:
Expand Down Expand Up @@ -284,12 +284,12 @@ async def run(self, work: "Work"):
)
find.add(list(work.top))

async for pinfo in self.iterate(focuses=work.focuses):
if ex and pinfo.key in ex:
async for paper in self.iterate(focuses=work.focuses):
if ex and paper.key in ex:
continue

if found := find.find(pinfo.paper):
found.value.add(pinfo)
if found := find.find(paper):
found.value.add(paper)
new_score = work.focuses.score(found.value.current)
if new_score != found.score:
# Might be unnecessarily expensive but we'll see
Expand All @@ -299,29 +299,29 @@ async def run(self, work: "Work"):
col_paper = None
if (
work.collection
and (col_paper := await work.collection.find_paper(pinfo.paper))
and (col_paper := await work.collection.find_paper(paper))
and (
not self.check_paper_updates
or not paper_has_updated(col_paper, pinfo.paper)
or not paper_has_updated(col_paper, paper)
)
):
continue

if col_paper:
working_set = PaperWorkingSet.make(
PaperInfo(
paper=col_paper,
key=pinfo.key,
info=pinfo.info,
replace(
col_paper,
key=paper.key,
info=paper.info,
score=work.focuses.score(col_paper),
)
)
working_set.add(pinfo)
working_set.add(paper)
scored = Scored(work.focuses.score(working_set.current), working_set)

else:
scored = Scored(
work.focuses.score(pinfo), PaperWorkingSet.make(pinfo)
work.focuses.score(paper), PaperWorkingSet.make(paper)
)

if work.top.add(scored):
Expand Down Expand Up @@ -1086,7 +1086,7 @@ async def show_progress(sent, dash):
@outsight.add
async def show_paper_stats(sent, dash):
async for group in sent["discover"].roll(5, partial=True):
values = [f"{pinfo.paper.title}" for pinfo in group]
values = [f"{paper.title}" for paper in group]
dash["titles"] = History(values)

@outsight.add
Expand Down
29 changes: 11 additions & 18 deletions src/paperoni/discovery/jmlr.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
Link,
Paper,
PaperAuthor,
PaperInfo,
Release,
Venue,
VenueType,
Expand Down Expand Up @@ -47,20 +46,16 @@ async def query(
yield paper
return
name = name and asciiify(name).lower()
async for paper_info in self.get_volume(volume, cache):
async for paper in self.get_volume(volume, cache):
try:
if (
paper_info
and paper_info.paper
and (
name is None
or any(
asciiify(auth.author.name).lower() == name
for auth in paper_info.paper.authors
)
if paper and (
name is None
or any(
asciiify(auth.author.name).lower() == name
for auth in paper.authors
)
):
yield paper_info
yield paper
except Exception as exc:
traceback.print_exception(exc)

Expand Down Expand Up @@ -166,12 +161,10 @@ def absolutize(lnk):
jmlr_key = f"{volume}:{title[:50].replace(' ', '_').lower()}"
paper_key = f"jmlr:{jmlr_key}"

yield PaperInfo(
key=paper_key,
acquired=datetime.now(),
paper=paper,
info={"discovered_by": {"jmlr": jmlr_key}},
)
paper.key = paper_key
paper.version = datetime.now()
paper.info = {"discovered_by": {"jmlr": jmlr_key}}
yield paper

async def extract_volumes(
self, index, selector, map: Callable = None, filter: Callable = None
Expand Down
23 changes: 10 additions & 13 deletions src/paperoni/discovery/miniconf.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
Link,
Paper,
PaperAuthor,
PaperInfo,
Release,
Topic,
Venue,
Expand Down Expand Up @@ -199,18 +198,16 @@ def process_uri(media_type, uri):
mid = f"{conference}:{data['uid']}"

# Create and return Paper object
return PaperInfo(
return Paper(
key=f"miniconf:{mid}",
acquired=datetime.now(),
paper=Paper(
title=title,
abstract=abstract,
authors=authors,
releases=[release],
topics=topics,
links=list(links),
flags=set(),
),
version=datetime.now(),
title=title,
abstract=abstract,
authors=authors,
releases=[release],
topics=topics,
links=list(links),
flags=set(),
info={"discovered_by": {"miniconf": mid}},
)

Expand Down Expand Up @@ -341,7 +338,7 @@ def matches(paper):
venue_date=conference_date,
date_precision=date_precision,
)
if matches(paper.paper):
if matches(paper):
n += 1
yield paper
except Exception as e:
Expand Down
15 changes: 6 additions & 9 deletions src/paperoni/discovery/openalex.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
Link,
Paper,
PaperAuthor,
PaperInfo,
Release,
Topic,
Venue,
Expand Down Expand Up @@ -204,14 +203,14 @@ async def _evaluate(self, path: str, **params):
assert "results" in jdata
return jdata

def _try_wrapping_paper(self, data: dict) -> PaperInfo:
def _try_wrapping_paper(self, data: dict) -> Paper:
try:
return self._wrap_paper(data)
except Exception:
pprint.pformat(data)
raise

def _wrap_paper(self, data: dict) -> PaperInfo:
def _wrap_paper(self, data: dict) -> Paper:
# Assert consistency in locations
typ = data["type"]
if typ not in self.work_types:
Expand Down Expand Up @@ -366,12 +365,10 @@ def venue_name(loc):
openalex_id = data["id"].split("/")[-1]
paper_key = f"openalex:{openalex_id}"

return PaperInfo(
key=paper_key,
acquired=datetime.now(),
paper=paper,
info={"discovered_by": {"openalex": openalex_id}},
)
paper.key = paper_key
paper.version = datetime.now()
paper.info = {"discovered_by": {"openalex": openalex_id}}
return paper

@classmethod
def _reconstruct_abstract(cls, inverted: Dict[str, List[int]]) -> str:
Expand Down
65 changes: 31 additions & 34 deletions src/paperoni/discovery/openreview.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
Link,
Paper,
PaperAuthor,
PaperInfo,
Release,
Topic,
Venue,
Expand Down Expand Up @@ -321,40 +320,38 @@ async def _query(self, params, total=0, limit=1000000):
precision = DatePrecision.year
venue_data["venue"] += f" {year}"

yield PaperInfo(
yield Paper(
key=f"openreview:{note.id}",
acquired=datetime.now(),
paper=Paper(
title=self.get_content_field(note, "title"),
abstract=self.get_content_field(note, "abstract"),
authors=authors,
releases=[
Release(
venue=Venue(
type=type(self)._map_venue_type(vid),
name=vid,
series=venue_to_series(vid),
volume=venue_data["venue"],
date=the_date,
date_precision=precision,
links=[
Link(
type="openreview-venue",
link=vid,
)
],
aliases=[],
),
status=decision,
pages=None,
)
],
topics=[
Topic(name=kw)
for kw in self.get_content_field(note, "keywords", [])
],
links=_links,
),
version=datetime.now(),
title=self.get_content_field(note, "title"),
abstract=self.get_content_field(note, "abstract"),
authors=authors,
releases=[
Release(
venue=Venue(
type=type(self)._map_venue_type(vid),
name=vid,
series=venue_to_series(vid),
volume=venue_data["venue"],
date=the_date,
date_precision=precision,
links=[
Link(
type="openreview-venue",
link=vid,
)
],
aliases=[],
),
status=decision,
pages=None,
)
],
topics=[
Topic(name=kw)
for kw in self.get_content_field(note, "keywords", [])
],
links=_links,
info={"discovered_by": {"openreview": note.id}},
)
next_offset += len(notes)
Expand Down
29 changes: 11 additions & 18 deletions src/paperoni/discovery/pmlr.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
Link,
Paper,
PaperAuthor,
PaperInfo,
Release,
Venue,
VenueType,
Expand Down Expand Up @@ -69,12 +68,10 @@ def parse_paper(entry):
pmlr_key = f"v{entry['volume']}:{entry['id']}"
paper_key = f"pmlr:{pmlr_key}"

return PaperInfo(
key=paper_key,
acquired=datetime.now(),
paper=p,
info={"discovered_by": {"pmlr": pmlr_key}},
)
p.key = paper_key
p.version = datetime.now()
p.info = {"discovered_by": {"pmlr": pmlr_key}}
return p


class PMLR(Discoverer):
Expand All @@ -98,20 +95,16 @@ async def query(
async for paper in self.query(v, name, cache, focuses):
yield paper
return
async for paper_info in self.get_volume(volume, cache):
async for paper in self.get_volume(volume, cache):
try:
if (
paper_info
and paper_info.paper
and (
name is None
or any(
asciiify(auth.author.name).lower() == name
for auth in paper_info.paper.authors
)
if paper and (
name is None
or any(
asciiify(auth.author.name).lower() == name
for auth in paper.authors
)
):
yield paper_info
yield paper
except Exception as exc:
traceback.print_exception(exc)

Expand Down
Loading
Loading