Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 34 additions & 7 deletions invenio_rdm_records/resources/serializers/csl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,37 @@ def _clean_result(text):
text = re.sub(r"\.\.+", ".", text)
return text

def _replace_doi_link(text, doi, new_doi_link):
"""Replace the citation DOI link with the correct one.

Citation styles that generate a DOI URL in their citation generate it with the
form: "https://doi.org/<prefix>/<suffix>". However, when using a
Datacite test account (i.e. when `DATACITE_TEST_MODE = True`) and
potentially when using other providers' test accounts, the actual DOI URL is
of a different form: "https://handle.test.datacite.org/<prefix>/<suffix> as of
writing. By relying on a passed DOI URL instead, we can make sure the
correct URL is used. The DOI url is passed in the namespaced entry
json["_extras"]["links"]["doi"].
"""
if doi and new_doi_link:
return text.replace(f"https://doi.org/{doi}", new_doi_link)
else:
return text

extras = json.pop("_extras", {})
source = CiteProcJSON([json])
citation_style = CitationStylesStyle(validate=False, style=style, locale=locale)
bib = CitationStylesBibliography(citation_style, source, formatter.plain)
citation = Citation([CitationItem(id)])
bib.register(citation)

return _clean_result(str(bib.bibliography()[0]))
citation_raw = str(bib.bibliography()[0])
citation_doi_replaced = _replace_doi_link(
citation_raw,
json.get("DOI"),
extras.get("links", {}).get("doi"),
)
return _clean_result(citation_doi_replaced)


def get_style_location(style):
Expand Down Expand Up @@ -113,25 +137,28 @@ def __init__(self, url_args_retriever, **options):
self.url_args_retriever = url_args_retriever

def serialize_object(self, record):
"""Serialize a single record.
"""Serialize the output of a RecordItem.to_dict() to a citation string.

:param record: Record instance.
:param record: dict from RecordItem.to_dict().
"""
style, locale = (
self.url_args_retriever()
if callable(self.url_args_retriever)
else self.url_args_retriever
)

# set defaults if params are not provided
style = style or self._default_style
locale = locale or self._default_locale

style_filepath = get_style_location(style)

return get_citation_string(
self.dump_obj(record), record["id"], style_filepath, locale
)
# Pass the record links under _extras namespace
# so that DOI link can be replaced
record_dumped = self.dump_obj(record)
record_dumped.setdefault("_extras", {})
record_dumped["_extras"]["links"] = record.get("links", {})

return get_citation_string(record_dumped, record["id"], style_filepath, locale)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just for my understanding: Effectively, the "protocol" for get_citation_string() gets extended so that if an _extras field with a DOI link is present, then that will be used to replace the default doi.org link?
In this code path (serialize_object()) that will always be the case; external calls can use it if they want, or keep using it without (in which case the logic is backwards-compatible).

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Effectively, the "protocol" for get_citation_string() gets extended so that if an _extras field with a DOI link is present, then that will be used to replace the default doi.org link?

Yes that's right. I didn't want to add additional keyword arguments to get_citation_string as it would be getting christmas tree like and the doi link seemed like it really had to do with the serialized record. All the replacements and fetching of that data is very backwards compatible/cautious so doesn't alter anything unduly.


def serialize_object_list(self, records):
"""Serialize a list of records.
Expand Down
15 changes: 13 additions & 2 deletions invenio_rdm_records/services/components/record_deletion.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,22 @@ def delete_record(self, identity, data=None, record=None, **kwargs):
default_citation_style = current_app.config.get(
"RDM_CITATION_STYLES_DEFAULT", "apa"
)

serializer = CSLJSONSerializer()
style = get_style_location(default_citation_style)

# We can't rely on the pure citation generated DOI link since it may be
# incorrect in test environment for Datacite. So we pass the correct DOI
# link (or None if doesn't exist) in the dumped record to
# get_citation_string and let it do the replacement appropriately.
record_dumped = serializer.dump_obj(record)
record_dumped.setdefault("_extras", {})
link_for_doi = self.service.links_item_tpl.expand(identity, record).get(
"doi"
)
record_dumped["_extras"]["links"] = {"doi": link_for_doi}

default_citation = get_citation_string(
serializer.dump_obj(record),
record_dumped,
record.pid.pid_value,
style,
locale=current_i18n.language,
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ install_requires =
invenio-checks>=4.0.0,<5.0.0
invenio-communities>=23.0.0,<24.0.0
invenio-drafts-resources>=8.0.0,<9.0.0
invenio-records-resources>=9.0.0,<10.0.0
invenio-records-resources>=9.1.0,<10.0.0
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just wondering; this PR looks pretty self-contained, I couldn't find anything that obviously needs a dependency bump?
If it's useful, it can of course stay.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yeah this wasn't related to this PR directly but I bumped into it when running tests: this commit 179949e introduced import of DateFacet, but DateFacet are only available in invenio-records==9.1.0+, so that dependency had to be bumped.

invenio-github>=5.0.0,<6.0.0
invenio-i18n>=3.0.0,<4.0.0
invenio-jobs>=7.0.0,<8.0.0
Expand Down
4 changes: 2 additions & 2 deletions tests/resources/serializers/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def full_record_to_dict():
"archive_media": "https://127.0.0.1:5000/api/records/12345-abcde/media-files-archive", # noqa
"communities": "https://127.0.0.1:5000/api/records/12345-abcde/communities",
"communities-suggestions": "https://127.0.0.1:5000/api/records/12345-abcde/communities-suggestions", # noqa
"doi": "https://handle.stage.datacite.org/10.1234/inveniordm.1234",
"doi": "https://handle.test.datacite.org/10.1234/inveniordm.1234",
"draft": "https://127.0.0.1:5000/api/records/12345-abcde/draft",
"files": "https://127.0.0.1:5000/api/records/12345-abcde/files",
"latest": "https://127.0.0.1:5000/api/records/12345-abcde/versions/latest",
Expand Down Expand Up @@ -501,7 +501,7 @@ def minimal_record_to_dict():
"archive_media": "https://127.0.0.1:5000/api/records/67890-fghij/media-files-archive", # noqa
"communities": "https://127.0.0.1:5000/api/records/67890-fghij/communities",
"communities-suggestions": "https://127.0.0.1:5000/api/records/67890-fghij/communities-suggestions", # noqa
"doi": "https://handle.stage.datacite.org/10.1234/67890-fghij",
"doi": "https://handle.test.datacite.org/10.1234/67890-fghij",
"draft": "https://127.0.0.1:5000/api/records/67890-fghij/draft",
"files": "https://127.0.0.1:5000/api/records/67890-fghij/files",
"latest": "https://127.0.0.1:5000/api/records/67890-fghij/versions/latest",
Expand Down
15 changes: 15 additions & 0 deletions tests/resources/serializers/test_csl_serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,20 @@
from invenio_rdm_records.resources.serializers.csl.schema import CSLJSONSchema


def test_string_citation_serializer(running_app, full_record_to_dict):
serializer = StringCitationSerializer(
url_args_retriever=("modern-language-association", "en-US")
)

result = serializer.serialize_object(full_record_to_dict)

expected = (
"Nielsen, L. H.and B. Tom. Inveniordm. v1.0, InvenioRDM, 2018–Sept. 2020, "
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that'd be an issue in citeproc, but is there a space missing between the the end of the first author and and? 👀

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh good eye on that one. It's indeed the citeproc's MLA citation that is like that e.g., https://zenodo.org/records/18927380 (and select MLA) :/ Not much we can do for now.

"https://handle.test.datacite.org/10.1234/inveniordm.1234."
)
assert expected == result


def test_csl_json_serializer(running_app, full_record_to_dict):
"""Test JSON CLS Serializer."""
# if the record is created this field will be present
Expand Down Expand Up @@ -72,6 +86,7 @@ def test_citation_string_serializer_records_list(
for _ in range(3):
draft = service.create(superuser_identity, minimal_record)
record = service.publish(superuser_identity, draft.id)

expected_record_data = get_citation_string(
CSLJSONSchema().dump(record),
record.id,
Expand Down
9 changes: 6 additions & 3 deletions tests/resources/serializers/test_datapackage_serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def test_data_package_serializer_minimal_record(minimal_record_to_dict):
serialized_record = serializer.dump_obj(minimal_record_to_dict)
assert serialized_record == {
"$schema": "https://datapackage.org/profiles/2.0/datapackage.json",
"id": "https://handle.stage.datacite.org/10.1234/67890-fghij",
"id": "https://handle.test.datacite.org/10.1234/67890-fghij",
"name": "67890-fghij",
"title": "A Romans story",
"created": "2023-11-14T19:33:09.837080+00:00",
Expand All @@ -46,10 +46,12 @@ def test_data_package_serializer_minimal_record(minimal_record_to_dict):

def test_data_package_serializer_full_record(full_record_to_dict):
serializer = DataPackageSerializer()

serialized_record = serializer.dump_obj(full_record_to_dict)
assert serialized_record == {

expected = {
"$schema": "https://datapackage.org/profiles/2.0/datapackage.json",
"id": "https://handle.stage.datacite.org/10.1234/inveniordm.1234",
"id": "https://handle.test.datacite.org/10.1234/inveniordm.1234",
"name": "12345-abcde",
"title": "InvenioRDM",
"description": "<h1>A description</h1> <p>with HTML tags</p>",
Expand Down Expand Up @@ -106,3 +108,4 @@ def test_data_package_serializer_full_record(full_record_to_dict):
},
],
}
assert expected == serialized_record
Loading