Skip to content

Commit 8e81a13

Browse files
authored
Merge branch 'develop' into feature/field-constraints
2 parents 4f6bfba + 9412884 commit 8e81a13

3 files changed

Lines changed: 50 additions & 49 deletions

File tree

CHANGELOG.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
55
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
66

77
## [Unreleased]
8+
### Fixed
9+
- [749](https://github.com/thoth-pub/thoth/pull/749) - Correct locale code formatting in Crossref metadata output
810
### Changed
9-
- Require endorsement author names and featured video titles
11+
- [749](https://github.com/thoth-pub/thoth/pull/749) - Remove ISBN limit in Crossref metadata output (introduced in v0.8.7)
12+
- [748](https://github.com/thoth-pub/thoth/pull/748) - Require endorsement author names and featured video titles
1013

1114
## [[1.2.0]](https://github.com/thoth-pub/thoth/releases/tag/v1.2.0) - 2026-05-04
1215
### Added

thoth-export-server/src/xml/doideposit_crossref.rs

Lines changed: 34 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -20,21 +20,21 @@ pub struct DoiDepositCrossref {}
2020

2121
const DEPOSIT_ERROR: &str = "doideposit::crossref";
2222
const CROSSREF_NS: &[(&str, &str)] = &[
23-
("version", "5.3.1"),
24-
("xmlns", "http://www.crossref.org/schema/5.3.1"),
23+
("version", "5.4.0"),
24+
("xmlns", "http://www.crossref.org/schema/5.4.0"),
2525
("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance"),
2626
(
2727
"xsi:schemaLocation",
28-
"http://www.crossref.org/schema/5.3.1 http://www.crossref.org/schemas/crossref5.3.1.xsd",
28+
"http://www.crossref.org/schema/5.4.0 http://www.crossref.org/schemas/crossref5.4.0.xsd",
2929
),
3030
("xmlns:ai", "http://www.crossref.org/AccessIndicators.xsd"),
3131
("xmlns:jats", "http://www.ncbi.nlm.nih.gov/JATS1"),
3232
("xmlns:fr", "http://www.crossref.org/fundref.xsd"),
3333
];
3434

35-
// Output format based on schema documentation at https://data.crossref.org/reports/help/schema_doc/5.3.1/index.html
35+
// Output format based on schema documentation at https://data.crossref.org/reports/help/schema_doc/5.4.0/index.html
3636
// (retrieved via https://www.crossref.org/documentation/schema-library/xsd-schema-quick-reference/).
37-
// Output validity tested using tool at https://www.crossref.org/02publishers/parser.html
37+
// Output validity previously tested for schema version 5.3.1 using tool at https://www.crossref.org/02publishers/parser.html
3838
// (retrieved via https://www.crossref.org/documentation/member-setup/direct-deposit-xml/testing-your-xml/).
3939
impl XmlSpecification for DoiDepositCrossref {
4040
fn handle_event<W: Write>(w: &mut EventWriter<W>, works: &[Work]) -> ThothResult<()> {
@@ -416,6 +416,7 @@ fn write_abstract_content_with_locale_code<W: Write>(
416416
locale_code: &str,
417417
w: &mut EventWriter<W>,
418418
) -> ThothResult<()> {
419+
let xml_lang = locale_code.replace('_', "-");
419420
let normalised_content = normalise_crossref_abstract_jats(abstract_content).map_err(|err| {
420421
ThothError::IncompleteMetadataRecord(
421422
DEPOSIT_ERROR.to_string(),
@@ -426,7 +427,7 @@ fn write_abstract_content_with_locale_code<W: Write>(
426427
"jats:abstract",
427428
Some(vec![
428429
("abstract-type", abstract_type),
429-
("xml:lang", locale_code),
430+
("xml:lang", xml_lang.as_str()),
430431
]),
431432
w,
432433
|w| write_jats_content(&normalised_content, w),
@@ -495,32 +496,13 @@ fn write_publication_date_content<W: Write>(
495496
}
496497

497498
fn write_work_publications<W: Write>(work: &Work, w: &mut EventWriter<W>) -> ThothResult<()> {
498-
let mut publications: Vec<WorkPublications> = work
499+
let publications: Vec<WorkPublications> = work
499500
.publications
500501
.clone()
501502
.into_iter()
502503
.filter(|p| p.isbn.is_some())
503504
.collect();
504505
if !publications.is_empty() {
505-
// Workaround for CrossRef's limit of 6 on the number of ISBNs permissible within a deposit file.
506-
// We raised this with CrossRef and they believe they should be able to increase the limit.
507-
// Remove this workaround once this is done (see https://github.com/thoth-pub/thoth/issues/379).
508-
// This was previously encountered with OBP works, which used to have 7 ISBNs as standard,
509-
// but currently have 5 as of August 2024.
510-
// So, the logic below should never be necessary with current publishers in Thoth.
511-
// The least important ISBN is the HTML ISBN, so omit it.
512-
if publications.len() > 6 {
513-
if let Some(html_index) = publications
514-
.iter()
515-
.position(|p| p.publication_type == PublicationType::HTML)
516-
{
517-
publications.swap_remove(html_index);
518-
}
519-
}
520-
// If there are still more than 6 ISBNs, assume they were added in decreasing order of importance.
521-
while publications.len() > 6 {
522-
publications.pop();
523-
}
524506
for publication in &publications {
525507
XmlElementBlock::<DoiDepositCrossref>::xml_element(publication, w)?;
526508
}
@@ -2542,6 +2524,24 @@ mod tests {
25422524
assert!(output.contains(r#"<jats:p>First line</jats:p>"#));
25432525
assert!(output.contains(r#"<jats:p>Second line</jats:p>"#));
25442526
assert!(!output.contains(r#"<jats:break"#));
2527+
2528+
// Locale codes written to xml:lang should use BCP 47 hyphen separators.
2529+
let mut buffer = Vec::new();
2530+
let mut writer = xml::writer::EmitterConfig::new()
2531+
.perform_indent(true)
2532+
.create_writer(&mut buffer);
2533+
2534+
let result = write_abstract_content_with_locale_code(
2535+
"<p>Translated abstract.</p>",
2536+
"long",
2537+
"ZH_CN",
2538+
&mut writer,
2539+
);
2540+
2541+
assert!(result.is_ok());
2542+
let output = String::from_utf8(buffer).unwrap();
2543+
assert!(output.contains(r#"<jats:abstract abstract-type="long" xml:lang="ZH-CN">"#));
2544+
assert!(!output.contains(r#"xml:lang="ZH_CN""#));
25452545
}
25462546

25472547
#[test]
@@ -2564,10 +2564,10 @@ mod tests {
25642564
}
25652565

25662566
#[test]
2567-
// Test that no more than 6 ISBNs are ever output.
2568-
// Remove/change this test once the CrossRef 6-ISBN limit is removed/increased -
2569-
// at this point, we need to remove the workaround and ensure that all ISBNs are included.
2570-
fn test_doideposit_crossref_isbns_workaround() {
2567+
// Crossref previously limited the number of ISBNs that could be included in a deposit file to 6,
2568+
// but this has now been increased in schema version 5.4.0 to 100 (which will never become relevant).
2569+
// Ensure that our own limit on the number of ISBNs output has been removed accordingly.
2570+
fn test_doideposit_crossref_isbns_workaround_removed() {
25712571
let mut test_work = Work {
25722572
work_id: Uuid::from_str("00000000-0000-0000-AAAA-000000000001").unwrap(),
25732573
work_status: WorkStatus::ACTIVE,
@@ -2795,27 +2795,27 @@ mod tests {
27952795
references: vec![],
27962796
};
27972797

2798-
// 7 ISBNs are present and one is HTML - confirm that it is omitted
2798+
// 7 ISBNs are present - confirm that all are included regardless of type
27992799
let output = generate_test_output(true, &test_work);
28002800
assert!(output.contains(r#" <isbn media_type="print">978-1-78839-908-1</isbn>"#));
28012801
assert!(output.contains(r#" <isbn media_type="print">978-1-7343145-0-2</isbn>"#));
28022802
assert!(output.contains(r#" <isbn media_type="electronic">978-0-07-063546-3</isbn>"#));
28032803
assert!(output.contains(r#" <isbn media_type="electronic">978-1-56619-909-4</isbn>"#));
28042804
assert!(output.contains(r#" <isbn media_type="electronic">978-92-95055-02-5</isbn>"#));
28052805
assert!(output.contains(r#" <isbn media_type="electronic">978-1-4028-9462-6</isbn>"#));
2806-
assert!(!output.contains(r#" <isbn media_type="electronic">978-3-16-148410-0</isbn>"#));
2806+
assert!(output.contains(r#" <isbn media_type="electronic">978-3-16-148410-0</isbn>"#));
28072807

28082808
// Change the HTML publication to a different format
28092809
test_work.publications[0].publication_type = PublicationType::MOBI;
2810-
// 7 ISBNs are present and none are HTML - confirm that the last one is omitted
2810+
// 7 ISBNs are present and none are HTML - confirm that all are included regardless of type
28112811
let output = generate_test_output(true, &test_work);
28122812
assert!(output.contains(r#" <isbn media_type="electronic">978-3-16-148410-0</isbn>"#));
28132813
assert!(output.contains(r#" <isbn media_type="print">978-1-78839-908-1</isbn>"#));
28142814
assert!(output.contains(r#" <isbn media_type="print">978-1-7343145-0-2</isbn>"#));
28152815
assert!(output.contains(r#" <isbn media_type="electronic">978-0-07-063546-3</isbn>"#));
28162816
assert!(output.contains(r#" <isbn media_type="electronic">978-1-56619-909-4</isbn>"#));
28172817
assert!(output.contains(r#" <isbn media_type="electronic">978-92-95055-02-5</isbn>"#));
2818-
assert!(!output.contains(r#" <isbn media_type="electronic">978-1-4028-9462-6</isbn>"#));
2818+
assert!(output.contains(r#" <isbn media_type="electronic">978-1-4028-9462-6</isbn>"#));
28192819
}
28202820

28212821
#[test]

thoth-export-server/src/xml/onix3_thoth.rs

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use cc_license::License;
22
use chrono::Utc;
33
use std::io::Write;
44
use thoth_api::model::language::LanguageCode as ApiLanguageCode;
5+
use thoth_api::model::locale::LocaleCode as ApiLocaleCode;
56
use thoth_client::{
67
AbstractType, AccessibilityException, AccessibilityStandard, ContactType, ContributionType,
78
LanguageRelation, LocationPlatform, PublicationType, RelationType, SubjectType, Work,
@@ -10,9 +11,8 @@ use thoth_client::{
1011
};
1112
use xml::writer::{EventWriter, XmlEvent};
1213

13-
use super::{write_element_block, XmlElement, XmlSpecification};
14+
use super::{write_element_block, TitleData, XmlElement, XmlSpecification};
1415
use crate::xml::{write_full_element_block, XmlElementBlock, ONIX3_NS};
15-
use thoth_api::model::locale::LocaleCode as ApiLocaleCode;
1616
use thoth_errors::{ThothError, ThothResult};
1717

1818
#[derive(Copy, Clone)]
@@ -382,12 +382,13 @@ impl XmlElementBlock<Onix3Thoth> for Work {
382382
write_element_block("TitleElementLevel", w, |w| {
383383
w.write(XmlEvent::Characters("01")).map_err(|e| e.into())
384384
})?;
385+
let api_locale: ApiLocaleCode =
386+
canonical_title.locale_code().clone().into();
387+
let lang_code: ApiLanguageCode = api_locale.into();
388+
let iso_code = lang_code.to_string().to_lowercase();
385389
write_full_element_block(
386390
"TitleText",
387-
Some(vec![(
388-
"language",
389-
&canonical_title.locale_code.to_string(),
390-
)]),
391+
Some(vec![("language", &iso_code)]),
391392
w,
392393
|w| {
393394
w.write(XmlEvent::Characters(&canonical_title.title))
@@ -397,10 +398,7 @@ impl XmlElementBlock<Onix3Thoth> for Work {
397398
if let Some(subtitle) = &canonical_title.subtitle {
398399
write_full_element_block(
399400
"Subtitle",
400-
Some(vec![(
401-
"language",
402-
&canonical_title.locale_code.to_string(),
403-
)]),
401+
Some(vec![("language", &iso_code)]),
404402
w,
405403
|w| {
406404
w.write(XmlEvent::Characters(subtitle))
@@ -2877,8 +2875,8 @@ mod tests {
28772875
<TitleType>01</TitleType>
28782876
<TitleElement>
28792877
<TitleElementLevel>01</TitleElementLevel>
2880-
<TitleText language="EN">Book Title</TitleText>
2881-
<Subtitle language="EN">Book Subtitle</Subtitle>
2878+
<TitleText language="eng">Book Title</TitleText>
2879+
<Subtitle language="eng">Book Subtitle</Subtitle>
28822880
</TitleElement>
28832881
</TitleDetail>
28842882
<EditionNumber>2</EditionNumber>
@@ -3529,11 +3527,11 @@ mod tests {
35293527
<TitleType>01</TitleType>
35303528
<TitleElement>
35313529
<TitleElementLevel>01</TitleElementLevel>
3532-
<TitleText language="EN">Book Title</TitleText>
3530+
<TitleText language="eng">Book Title</TitleText>
35333531
</TitleElement>
35343532
</TitleDetail>"#
35353533
));
3536-
assert!(!output.contains(r#" <Subtitle language="EN">Book Subtitle</Subtitle>"#));
3534+
assert!(!output.contains(r#" <Subtitle language="eng">Book Subtitle</Subtitle>"#));
35373535
assert!(!output.contains(r#" <Edition>"#));
35383536
assert!(!output.contains(r#" <Subtitle>Book Subtitle</Subtitle>"#));
35393537
assert!(!output.contains(r#" <EditionNumber>1</EditionNumber>"#));

0 commit comments

Comments
 (0)