Skip to content

Commit 431c8e9

Browse files
committed
Update ingestion script
1 parent dbadf3d commit 431c8e9

File tree

1 file changed

+6
-20
lines changed

1 file changed

+6
-20
lines changed

bin/ingest_mitpress.py

Lines changed: 6 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@
4141
from typing import List, Optional, Tuple
4242

4343
from acl_anthology import Anthology
44+
from acl_anthology.collections import VolumeType
45+
from acl_anthology.text import MarkupText
4446

4547
# from anthology.utils import make_simple_element, indent, compute_hash_from_file
4648

@@ -303,16 +305,7 @@ def main(args):
303305

304306
collection_id = str(year) + "." + venue
305307
if (collection := anthology.collections.get(collection_id)) is None:
306-
# TODO: Should provide convenience function for instantiating new collections
307-
from acl_anthology.collections import Collection
308-
309-
collection = Collection(
310-
id=collection_id,
311-
parent=anthology.collections,
312-
path=os.path.join(args.anthology_dir, "data", "xml", f"{collection_id}.xml"),
313-
)
314-
collection.is_data_loaded = True
315-
# ------
308+
collection = anthology.collections.create(collection_id)
316309

317310
papers = []
318311
for xml in sorted(args.root_dir.glob("*.xml")):
@@ -350,24 +343,18 @@ def sort_papers_by_page(paper_tuple):
350343
else:
351344
month = None
352345

353-
# TODO: encapsulate
354-
from acl_anthology.collections import Volume, VolumeType
355-
from acl_anthology.text import MarkupText
356-
357-
volume = Volume(
358-
id=issue,
359-
parent=collection,
346+
volume = collection.create_volume(
347+
issue,
348+
title=MarkupText.from_string(issue_info), # TODO: from LaTeX?
360349
type=VolumeType.JOURNAL,
361350
year=str(year),
362351
month=month,
363-
booktitle=MarkupText.from_string(issue_info), # TODO: from LaTeX?
364352
publisher="MIT Press",
365353
address="Cambridge, MA",
366354
venue_ids=[venue],
367355
journal_volume=volume,
368356
journal_issue=issue,
369357
)
370-
# ------
371358

372359
# Check if the paper is already present in the volume
373360
if any(paper.get("doi") == paper_dict["doi"] for paper in volume.papers()):
@@ -377,7 +364,6 @@ def sort_papers_by_page(paper_tuple):
377364
# TODO: encapsulate
378365
from acl_anthology.collections import Paper
379366
from acl_anthology.people import NameSpecification, Name
380-
from acl_anthology.text import MarkupText
381367

382368
paper = Paper(
383369
id=..., # should be determined automatically

0 commit comments

Comments
 (0)