Skip to content

Commit ddcbd2f

Browse files
Merge pull request #327 from scieloorg/copilot/fix-preprint-data-crossref
Crossref export: emit hasPreprint relation from related-article preprint entries
2 parents 99d6fa7 + 4a7b2a2 commit ddcbd2f

2 files changed

Lines changed: 120 additions & 0 deletions

File tree

articlemeta/export_crossref.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1134,6 +1134,36 @@ def transform(self, data):
11341134
data = self._transform_translations(data)
11351135
return data
11361136

1137+
@staticmethod
1138+
def _get_preprint_relations(raw):
1139+
"""Return the list of related-article entries marked as preprint.
1140+
1141+
SciELO stores related-article info in ISIS field ``v241``, with
1142+
subfields ``i`` (identifier/href), ``t`` (related-article-type) and
1143+
``n`` (ext-link-type). Only entries whose type is ``preprint`` and
1144+
whose link type is ``doi`` (or unspecified) carry a usable DOI for
1145+
the Crossref ``hasPreprint`` relation.
1146+
"""
1147+
try:
1148+
related = raw.data['article'].get('v241') or []
1149+
except (AttributeError, KeyError, TypeError):
1150+
return []
1151+
1152+
preprints = []
1153+
for item in related:
1154+
if not isinstance(item, dict):
1155+
continue
1156+
if item.get('t') != 'preprint':
1157+
continue
1158+
identifier = item.get('i') or item.get('_')
1159+
if not identifier:
1160+
continue
1161+
ext_link_type = item.get('n')
1162+
if ext_link_type and ext_link_type != 'doi':
1163+
continue
1164+
preprints.append(identifier)
1165+
return preprints
1166+
11371167
def _transform_original(self, data):
11381168
raw, xml = data
11391169

@@ -1168,6 +1198,19 @@ def _transform_original(self, data):
11681198

11691199
program_node.append(related_item_node)
11701200

1201+
# program/related_item (hasPreprint)
1202+
for preprint_doi in self._get_preprint_relations(raw):
1203+
related_item_node = ET.Element('related_item')
1204+
1205+
intra_work_relation_node = ET.Element('intra_work_relation')
1206+
intra_work_relation_node.set(
1207+
'relationship-type', 'hasPreprint')
1208+
intra_work_relation_node.set('identifier-type', 'doi')
1209+
intra_work_relation_node.text = preprint_doi
1210+
related_item_node.append(intra_work_relation_node)
1211+
1212+
program_node.append(related_item_node)
1213+
11711214
journal_article_node.append(program_node)
11721215

11731216
return data

tests/test_export_crossref.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1342,6 +1342,83 @@ def test_related_item_for_multilingue_document(self):
13421342
content[3],
13431343
intra_work_relation.attrib.get('relationship-type'))
13441344

1345+
def test_related_item_includes_has_preprint_relation(self):
1346+
self._article.data['article']['v241'] = [
1347+
{
1348+
'i': '10.1590/SciELOPreprints.9348',
1349+
't': 'preprint',
1350+
'n': 'doi',
1351+
}
1352+
]
1353+
xmlcrossref = create_xmlcrossref_with_n_journal_article_element(
1354+
['pt', 'en', 'es'])
1355+
1356+
data = [self._article, xmlcrossref]
1357+
xmlcrossref = export_crossref.XMLProgramRelatedItemPipe()
1358+
raw, xml = xmlcrossref.transform(data)
1359+
1360+
# main journal_article keeps its translations and gains the preprint
1361+
main_program = xml.findall('.//journal_article')[0].find('program')
1362+
relations = main_program.findall('related_item/intra_work_relation')
1363+
relation_types = [r.attrib.get('relationship-type') for r in relations]
1364+
1365+
self.assertIn('hasPreprint', relation_types)
1366+
preprint_node = next(
1367+
r for r in relations
1368+
if r.attrib.get('relationship-type') == 'hasPreprint'
1369+
)
1370+
self.assertEqual('doi', preprint_node.attrib.get('identifier-type'))
1371+
self.assertEqual('10.1590/SciELOPreprints.9348', preprint_node.text)
1372+
1373+
# translations should not get the hasPreprint relation
1374+
for journal_article in xml.findall('.//journal_article')[1:]:
1375+
translation_relations = journal_article.findall(
1376+
'program/related_item/intra_work_relation')
1377+
translation_types = [
1378+
r.attrib.get('relationship-type')
1379+
for r in translation_relations
1380+
]
1381+
self.assertNotIn('hasPreprint', translation_types)
1382+
1383+
def test_related_item_without_preprint_does_not_emit_has_preprint(self):
1384+
# ensure no v241 entries -> no hasPreprint relation is emitted
1385+
self._article.data['article'].pop('v241', None)
1386+
xmlcrossref = create_xmlcrossref_with_n_journal_article_element(
1387+
['pt', 'en', 'es'])
1388+
1389+
data = [self._article, xmlcrossref]
1390+
xmlcrossref = export_crossref.XMLProgramRelatedItemPipe()
1391+
raw, xml = xmlcrossref.transform(data)
1392+
1393+
relation_types = [
1394+
r.attrib.get('relationship-type')
1395+
for r in xml.findall(
1396+
'.//program/related_item/intra_work_relation')
1397+
]
1398+
self.assertNotIn('hasPreprint', relation_types)
1399+
1400+
def test_related_item_ignores_non_preprint_related_articles(self):
1401+
self._article.data['article']['v241'] = [
1402+
{
1403+
'i': '10.1590/some-other.1234',
1404+
't': 'commentary',
1405+
'n': 'doi',
1406+
}
1407+
]
1408+
xmlcrossref = create_xmlcrossref_with_n_journal_article_element(
1409+
['pt', 'en', 'es'])
1410+
1411+
data = [self._article, xmlcrossref]
1412+
xmlcrossref = export_crossref.XMLProgramRelatedItemPipe()
1413+
raw, xml = xmlcrossref.transform(data)
1414+
1415+
relation_types = [
1416+
r.attrib.get('relationship-type')
1417+
for r in xml.findall(
1418+
'.//program/related_item/intra_work_relation')
1419+
]
1420+
self.assertNotIn('hasPreprint', relation_types)
1421+
13451422
def test_collection_for_multilingue_document(self):
13461423
xmlcrossref = create_xmlcrossref_with_n_journal_article_element(
13471424
['pt', 'en', 'es'], 'doi_data')

0 commit comments

Comments
 (0)