@@ -20,21 +20,21 @@ pub struct DoiDepositCrossref {}
2020
2121const DEPOSIT_ERROR : & str = "doideposit::crossref" ;
2222const CROSSREF_NS : & [ ( & str , & str ) ] = & [
23- ( "version" , "5.3.1 " ) ,
24- ( "xmlns" , "http://www.crossref.org/schema/5.3.1 " ) ,
23+ ( "version" , "5.4.0 " ) ,
24+ ( "xmlns" , "http://www.crossref.org/schema/5.4.0 " ) ,
2525 ( "xmlns:xsi" , "http://www.w3.org/2001/XMLSchema-instance" ) ,
2626 (
2727 "xsi:schemaLocation" ,
28- "http://www.crossref.org/schema/5.3.1 http://www.crossref.org/schemas/crossref5.3.1 .xsd" ,
28+ "http://www.crossref.org/schema/5.4.0 http://www.crossref.org/schemas/crossref5.4.0 .xsd" ,
2929 ) ,
3030 ( "xmlns:ai" , "http://www.crossref.org/AccessIndicators.xsd" ) ,
3131 ( "xmlns:jats" , "http://www.ncbi.nlm.nih.gov/JATS1" ) ,
3232 ( "xmlns:fr" , "http://www.crossref.org/fundref.xsd" ) ,
3333] ;
3434
35- // Output format based on schema documentation at https://data.crossref.org/reports/help/schema_doc/5.3.1 /index.html
35+ // Output format based on schema documentation at https://data.crossref.org/reports/help/schema_doc/5.4.0 /index.html
3636// (retrieved via https://www.crossref.org/documentation/schema-library/xsd-schema-quick-reference/).
37- // Output validity tested using tool at https://www.crossref.org/02publishers/parser.html
37+ // Output validity previously tested for schema version 5.3.1 using tool at https://www.crossref.org/02publishers/parser.html
3838// (retrieved via https://www.crossref.org/documentation/member-setup/direct-deposit-xml/testing-your-xml/).
3939impl XmlSpecification for DoiDepositCrossref {
4040 fn handle_event < W : Write > ( w : & mut EventWriter < W > , works : & [ Work ] ) -> ThothResult < ( ) > {
@@ -416,6 +416,7 @@ fn write_abstract_content_with_locale_code<W: Write>(
416416 locale_code : & str ,
417417 w : & mut EventWriter < W > ,
418418) -> ThothResult < ( ) > {
419+ let xml_lang = locale_code. replace ( '_' , "-" ) ;
419420 let normalised_content = normalise_crossref_abstract_jats ( abstract_content) . map_err ( |err| {
420421 ThothError :: IncompleteMetadataRecord (
421422 DEPOSIT_ERROR . to_string ( ) ,
@@ -426,7 +427,7 @@ fn write_abstract_content_with_locale_code<W: Write>(
426427 "jats:abstract" ,
427428 Some ( vec ! [
428429 ( "abstract-type" , abstract_type) ,
429- ( "xml:lang" , locale_code ) ,
430+ ( "xml:lang" , xml_lang . as_str ( ) ) ,
430431 ] ) ,
431432 w,
432433 |w| write_jats_content ( & normalised_content, w) ,
@@ -495,32 +496,13 @@ fn write_publication_date_content<W: Write>(
495496}
496497
497498fn write_work_publications < W : Write > ( work : & Work , w : & mut EventWriter < W > ) -> ThothResult < ( ) > {
498- let mut publications: Vec < WorkPublications > = work
499+ let publications: Vec < WorkPublications > = work
499500 . publications
500501 . clone ( )
501502 . into_iter ( )
502503 . filter ( |p| p. isbn . is_some ( ) )
503504 . collect ( ) ;
504505 if !publications. is_empty ( ) {
505- // Workaround for CrossRef's limit of 6 on the number of ISBNs permissible within a deposit file.
506- // We raised this with CrossRef and they believe they should be able to increase the limit.
507- // Remove this workaround once this is done (see https://github.com/thoth-pub/thoth/issues/379).
508- // This was previously encountered with OBP works, which used to have 7 ISBNs as standard,
509- // but currently have 5 as of August 2024.
510- // So, the logic below should never be necessary with current publishers in Thoth.
511- // The least important ISBN is the HTML ISBN, so omit it.
512- if publications. len ( ) > 6 {
513- if let Some ( html_index) = publications
514- . iter ( )
515- . position ( |p| p. publication_type == PublicationType :: HTML )
516- {
517- publications. swap_remove ( html_index) ;
518- }
519- }
520- // If there are still more than 6 ISBNs, assume they were added in decreasing order of importance.
521- while publications. len ( ) > 6 {
522- publications. pop ( ) ;
523- }
524506 for publication in & publications {
525507 XmlElementBlock :: < DoiDepositCrossref > :: xml_element ( publication, w) ?;
526508 }
@@ -2542,6 +2524,24 @@ mod tests {
25422524 assert ! ( output. contains( r#"<jats:p>First line</jats:p>"# ) ) ;
25432525 assert ! ( output. contains( r#"<jats:p>Second line</jats:p>"# ) ) ;
25442526 assert ! ( !output. contains( r#"<jats:break"# ) ) ;
2527+
2528+ // Locale codes written to xml:lang should use BCP 47 hyphen separators.
2529+ let mut buffer = Vec :: new ( ) ;
2530+ let mut writer = xml:: writer:: EmitterConfig :: new ( )
2531+ . perform_indent ( true )
2532+ . create_writer ( & mut buffer) ;
2533+
2534+ let result = write_abstract_content_with_locale_code (
2535+ "<p>Translated abstract.</p>" ,
2536+ "long" ,
2537+ "ZH_CN" ,
2538+ & mut writer,
2539+ ) ;
2540+
2541+ assert ! ( result. is_ok( ) ) ;
2542+ let output = String :: from_utf8 ( buffer) . unwrap ( ) ;
2543+ assert ! ( output. contains( r#"<jats:abstract abstract-type="long" xml:lang="ZH-CN">"# ) ) ;
2544+ assert ! ( !output. contains( r#"xml:lang="ZH_CN""# ) ) ;
25452545 }
25462546
25472547 #[ test]
@@ -2564,10 +2564,10 @@ mod tests {
25642564 }
25652565
25662566 #[ test]
2567- // Test that no more than 6 ISBNs are ever output.
2568- // Remove/change this test once the CrossRef 6-ISBN limit is removed/increased -
2569- // at this point, we need to remove the workaround and ensure that all ISBNs are included .
2570- fn test_doideposit_crossref_isbns_workaround ( ) {
2567+ // Crossref previously limited the number of ISBNs that could be included in a deposit file to 6,
2568+ // but this has now been increased in schema version 5.4.0 to 100 (which will never become relevant).
2569+ // Ensure that our own limit on the number of ISBNs output has been removed accordingly .
2570+ fn test_doideposit_crossref_isbns_workaround_removed ( ) {
25712571 let mut test_work = Work {
25722572 work_id : Uuid :: from_str ( "00000000-0000-0000-AAAA-000000000001" ) . unwrap ( ) ,
25732573 work_status : WorkStatus :: ACTIVE ,
@@ -2795,27 +2795,27 @@ mod tests {
27952795 references : vec ! [ ] ,
27962796 } ;
27972797
2798- // 7 ISBNs are present and one is HTML - confirm that it is omitted
2798+ // 7 ISBNs are present - confirm that all are included regardless of type
27992799 let output = generate_test_output ( true , & test_work) ;
28002800 assert ! ( output. contains( r#" <isbn media_type="print">978-1-78839-908-1</isbn>"# ) ) ;
28012801 assert ! ( output. contains( r#" <isbn media_type="print">978-1-7343145-0-2</isbn>"# ) ) ;
28022802 assert ! ( output. contains( r#" <isbn media_type="electronic">978-0-07-063546-3</isbn>"# ) ) ;
28032803 assert ! ( output. contains( r#" <isbn media_type="electronic">978-1-56619-909-4</isbn>"# ) ) ;
28042804 assert ! ( output. contains( r#" <isbn media_type="electronic">978-92-95055-02-5</isbn>"# ) ) ;
28052805 assert ! ( output. contains( r#" <isbn media_type="electronic">978-1-4028-9462-6</isbn>"# ) ) ;
2806- assert ! ( ! output. contains( r#" <isbn media_type="electronic">978-3-16-148410-0</isbn>"# ) ) ;
2806+ assert ! ( output. contains( r#" <isbn media_type="electronic">978-3-16-148410-0</isbn>"# ) ) ;
28072807
28082808 // Change the HTML publication to a different format
28092809 test_work. publications [ 0 ] . publication_type = PublicationType :: MOBI ;
2810- // 7 ISBNs are present and none are HTML - confirm that the last one is omitted
2810+ // 7 ISBNs are present and none are HTML - confirm that all are included regardless of type
28112811 let output = generate_test_output ( true , & test_work) ;
28122812 assert ! ( output. contains( r#" <isbn media_type="electronic">978-3-16-148410-0</isbn>"# ) ) ;
28132813 assert ! ( output. contains( r#" <isbn media_type="print">978-1-78839-908-1</isbn>"# ) ) ;
28142814 assert ! ( output. contains( r#" <isbn media_type="print">978-1-7343145-0-2</isbn>"# ) ) ;
28152815 assert ! ( output. contains( r#" <isbn media_type="electronic">978-0-07-063546-3</isbn>"# ) ) ;
28162816 assert ! ( output. contains( r#" <isbn media_type="electronic">978-1-56619-909-4</isbn>"# ) ) ;
28172817 assert ! ( output. contains( r#" <isbn media_type="electronic">978-92-95055-02-5</isbn>"# ) ) ;
2818- assert ! ( ! output. contains( r#" <isbn media_type="electronic">978-1-4028-9462-6</isbn>"# ) ) ;
2818+ assert ! ( output. contains( r#" <isbn media_type="electronic">978-1-4028-9462-6</isbn>"# ) ) ;
28192819 }
28202820
28212821 #[ test]
0 commit comments