Skip to content

Commit 044157d

Browse files
committed
feat: add Crossref PID provider
* register DOIs with Crossref, functionality analogous to DataCitePIDProvider * CrossrefXMLSerializer for metadata export * includes test coverage similar to DataCite PID provider
1 parent 49c8a2e commit 044157d

15 files changed

Lines changed: 2226 additions & 16 deletions

invenio_rdm_records/config.py

Lines changed: 66 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -513,11 +513,24 @@ def always_valid(identifier):
513513
client=providers.DataCiteClient("datacite", config_prefix="DATACITE"),
514514
label=_("DOI"),
515515
),
516+
# Crossref DOI provider
517+
providers.CrossrefPIDProvider(
518+
"crossref",
519+
client=providers.CrossrefClient("crossref", config_prefix="CROSSREF"),
520+
label=_("DOI"),
521+
),
516522
# DOI provider for externally managed DOIs
517523
providers.ExternalPIDProvider(
518524
"external",
519525
"doi",
520-
validators=[providers.BlockedPrefixes(config_names=["DATACITE_PREFIX"])],
526+
validators=[
527+
providers.BlockedPrefixes(
528+
config_names=[
529+
"DATACITE_PREFIX",
530+
"CROSSREF_PREFIX",
531+
]
532+
)
533+
],
521534
label=_("DOI"),
522535
),
523536
# OAI identifier
@@ -537,14 +550,15 @@ def always_valid(identifier):
537550
"""
538551

539552
RDM_PERSISTENT_IDENTIFIERS = {
540-
# DOI automatically removed if DATACITE_ENABLED is False.
553+
# DOI automatically removed if DATACITE_ENABLED and CROSSREF_ENABLED are False.
541554
"doi": {
542-
"providers": ["datacite", "external"],
555+
"providers": ["datacite", "crossref", "external"],
543556
"required": True,
544557
"label": _("DOI"),
545558
"validator": idutils.is_doi,
546559
"normalizer": idutils.normalize_doi,
547-
"is_enabled": providers.DataCitePIDProvider.is_enabled,
560+
"is_enabled": providers.DataCitePIDProvider.is_enabled
561+
or providers.CrossrefPIDProvider.is_enabled,
548562
"ui": {"default_selected": "yes"}, # "yes", "no" or "not_needed"
549563
},
550564
"oai": {
@@ -602,7 +616,7 @@ def always_valid(identifier):
602616
# Configuration for the DataCiteClient used by the DataCitePIDProvider
603617

604618
DATACITE_ENABLED = False
605-
"""Flag to enable/disable DOI registration."""
619+
"""Flag to enable/disable DataCite DOI registration."""
606620

607621
DATACITE_USERNAME = ""
608622
"""DataCite username."""
@@ -613,6 +627,9 @@ def always_valid(identifier):
613627
DATACITE_PREFIX = ""
614628
"""DataCite DOI prefix."""
615629

630+
DATACITE_ADDITIONAL_PREFIXES = []
631+
"""List of additional DataCite DOI prefixes supported for registration."""
632+
616633
DATACITE_TEST_MODE = True
617634
"""DataCite test mode enabled."""
618635

@@ -638,6 +655,50 @@ def make_doi(prefix, record):
638655
in DataCite XML format.
639656
"""
640657

658+
# Configuration for the CrossrefClient used by the CrossrefPIDProvider
659+
660+
CROSSREF_ENABLED = False
661+
"""Flag to enable/disable Crossref DOI registration."""
662+
663+
CROSSREF_USERNAME = ""
664+
"""Crossref username."""
665+
666+
CROSSREF_PASSWORD = ""
667+
"""Crossref password."""
668+
669+
CROSSREF_PREFIX = ""
670+
"""Crossref DOI prefix."""
671+
672+
CROSSREF_ADDITIONAL_PREFIXES = []
673+
"""List of additional Crossref DOI prefixes supported for registration."""
674+
675+
CROSSREF_DEPOSITOR = ""
676+
"""Crossref depositor name."""
677+
678+
CROSSREF_EMAIL = ""
679+
"""Crossref depositor email."""
680+
681+
CROSSREF_REGISTRANT = ""
682+
"""Crossref registrant."""
683+
684+
CROSSREF_TEST_MODE = True
685+
"""Crossref test mode enabled."""
686+
687+
CROSSREF_FORMAT = "{prefix}/{id}"
688+
"""A string used for formatting the DOI or a callable.
689+
690+
If set to a string, you can used ``{prefix}`` and ``{id}`` inside the string.
691+
692+
You can also provide a callable instead:
693+
694+
.. code-block:: python
695+
696+
def make_doi(prefix, record):
697+
return f"{prefix}/{record.pid.pid_value}"
698+
699+
CROSSREF_FORMAT = make_doi
700+
"""
701+
641702
#
642703
# Custom fields
643704
#

invenio_rdm_records/resources/serializers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from .bibtex import BibtexSerializer
2020
from .cff import CFFSerializer
2121
from .codemeta import CodemetaSerializer
22+
from .crossref import CrossrefXMLSerializer
2223
from .csl import CSLJSONSerializer, StringCitationSerializer
2324
from .csv import CSVRecordSerializer
2425
from .datacite import (
@@ -48,6 +49,7 @@
4849
__all__ = (
4950
"BibtexSerializer",
5051
"CFFSerializer",
52+
"CrossrefXMLSerializer",
5153
"CSLJSONSerializer",
5254
"CSVRecordSerializer",
5355
"DataCite43JSONSerializer",
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
# -*- coding: utf-8 -*-
2+
#
3+
# Copyright (C) 2021-2024 CERN.
4+
# Copyright (C) 2026 Front Matter.
5+
#
6+
# Invenio-RDM-Records is free software; you can redistribute it and/or modify
7+
# it under the terms of the MIT License; see LICENSE file for more details.
8+
9+
"""Crossref Serializers for Invenio RDM Records."""
10+
11+
from commonmeta import (
12+
CrossrefError,
13+
CrossrefXMLSchema,
14+
Metadata,
15+
tostring,
16+
write_crossref_xml,
17+
)
18+
from flask import current_app
19+
from flask_resources import BaseListSchema, MarshmallowSerializer
20+
from flask_resources.serializers import SimpleSerializer
21+
22+
23+
class CrossrefXMLSerializer(MarshmallowSerializer):
24+
"""Marshmallow based Crossref XML serializer for records."""
25+
26+
def __init__(self, **options):
27+
"""Constructor."""
28+
encoder = options.get("encoder", tostring)
29+
super().__init__(
30+
format_serializer_cls=SimpleSerializer,
31+
object_schema_cls=CrossrefXMLSchema,
32+
list_schema_cls=BaseListSchema,
33+
encoder=encoder,
34+
**options,
35+
)
36+
37+
def serialize_object(self, obj):
38+
"""Serialize a single record to Crossref XML bytes.
39+
40+
Overrides the default to avoid double-encoding, since
41+
``dump_obj`` already returns XML bytes.
42+
"""
43+
return self.dump_obj(obj)
44+
45+
def dump_obj(self, record, url=None):
46+
"""Dump a single record.
47+
48+
Config variables for Crossref XML head elements are used in the
49+
XML head element.
50+
51+
:param record: Record instance (dict, Record model, or ChainObject).
52+
:param url: the landing page URL for the DOI.
53+
Falls back to ``SITE_UI_URL``/records/<id> if not provided.
54+
"""
55+
# Determine the URL that the DOI resolves to, in the following order:
56+
#
57+
# 1. identifier of type url in ``metadata.identifiers``
58+
# (e.g. archived original content)
59+
# 2. The landing page URL passed by the PID service
60+
# 3. Default constructed from ``SITE_UI_URL`` and record ID
61+
# (e.g. for Celery tasks or tests without UI endpoints)
62+
identifiers = (
63+
record.get("metadata", {}).get("identifiers", [])
64+
if isinstance(record, dict)
65+
else getattr(getattr(record, "metadata", None), "get", lambda *a: [])(
66+
"identifiers", []
67+
)
68+
)
69+
registered_url = (
70+
next(
71+
(
72+
i.get("identifier")
73+
for i in (identifiers or [])
74+
if i.get("scheme") == "url" and i.get("identifier") is not None
75+
),
76+
None,
77+
)
78+
or url
79+
)
80+
81+
if registered_url is None:
82+
site_url = current_app.config.get("SITE_UI_URL", "")
83+
record_id = (
84+
record.get("id")
85+
if isinstance(record, dict)
86+
else getattr(record, "id", None)
87+
)
88+
if site_url and record_id:
89+
registered_url = f"{site_url}/records/{record_id}"
90+
91+
# Convert the metadata to crossref_xml format via the commonmeta intermediary format.
92+
# XML Schema validation errors raise CrossrefError.
93+
try:
94+
metadata = Metadata(
95+
record,
96+
via="inveniordm",
97+
url=registered_url,
98+
)
99+
crossref_xml = write_crossref_xml(metadata)
100+
head = {
101+
"depositor": current_app.config.get("CROSSREF_DEPOSITOR"),
102+
"email": current_app.config.get("CROSSREF_EMAIL"),
103+
"registrant": current_app.config.get("CROSSREF_REGISTRANT"),
104+
}
105+
return tostring(crossref_xml, head=head)
106+
except CrossrefError as e:
107+
current_app.logger.error(
108+
f"CrossrefError while converting {metadata.id} to Crossref XML: {str(e)}"
109+
)
110+
return ""

invenio_rdm_records/services/pids/providers/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,15 @@
99
"""PID Providers module."""
1010

1111
from .base import PIDProvider
12+
from .crossref import CrossrefClient, CrossrefPIDProvider
1213
from .datacite import DataCiteClient, DataCitePIDProvider
1314
from .external import BlockedPrefixes, ExternalPIDProvider
1415
from .oai import OAIPIDProvider
1516

1617
__all__ = (
1718
"BlockedPrefixes",
19+
"CrossrefClient",
20+
"CrossrefPIDProvider",
1821
"DataCiteClient",
1922
"DataCitePIDProvider",
2023
"ExternalPIDProvider",

0 commit comments

Comments
 (0)