1+ use super :: { write_element_block, XmlElementBlock , XmlSpecification } ;
12use std:: collections:: HashSet ;
3+ use std:: io:: Write ;
24
3- use quick_xml:: escape:: escape;
45use thoth_api:: markup:: { convert_from_jats, ConversionLimit , MarkupFormat } ;
56use thoth_client:: {
67 AbstractType , ContributionType , LanguageRelation , PublicationType , SubjectType , Work ,
78 WorkAbstracts , WorkContributions , WorkLanguages , WorkTitles ,
89} ;
910use thoth_errors:: { ThothError , ThothResult } ;
10-
11- use crate :: record :: XML_DECLARATION ;
11+ use xml :: writer :: events :: StartElementBuilder ;
12+ use xml :: writer :: { EmitterConfig , EventWriter , XmlEvent } ;
1213
1314const DUBLIN_CORE_ERROR : & str = "dublin_core::thoth" ;
1415const BY_WORK_ONLY_MESSAGE : & str = "Output can only be generated for one work at a time" ;
16+ const DUBLIN_CORE_NS : & [ ( & str , & str ) ] = & [
17+ ( "xmlns:oai_dc" , "http://www.openarchives.org/OAI/2.0/oai_dc/" ) ,
18+ ( "xmlns:dc" , "http://purl.org/dc/elements/1.1/" ) ,
19+ ( "xmlns:xsi" , "http://www.w3.org/2001/XMLSchema-instance" ) ,
20+ (
21+ "xsi:schemaLocation" ,
22+ "http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd" ,
23+ ) ,
24+ ] ;
1525
1626#[ derive( Copy , Clone ) ]
1727pub ( crate ) struct DublinCoreThoth ;
1828
19- impl DublinCoreThoth {
20- pub ( crate ) fn generate ( & self , works : & [ Work ] ) -> ThothResult < String > {
29+ impl XmlSpecification for DublinCoreThoth {
30+ fn handle_event < W : Write > ( w : & mut EventWriter < W > , works : & [ Work ] ) -> ThothResult < ( ) > {
2131 match works {
2232 [ ] => Err ( ThothError :: IncompleteMetadataRecord (
2333 DUBLIN_CORE_ERROR . to_string ( ) ,
2434 "Not enough data" . to_string ( ) ,
2535 ) ) ,
26- [ work] => Ok ( format ! ( "{XML_DECLARATION} \n {}" , map_dublin_core ( work ) ? ) ) ,
36+ [ work] => XmlElementBlock :: < DublinCoreThoth > :: xml_element ( work , w ) ,
2737 _ => Err ( ThothError :: IncompleteMetadataRecord (
2838 DUBLIN_CORE_ERROR . to_string ( ) ,
2939 BY_WORK_ONLY_MESSAGE . to_string ( ) ,
@@ -32,18 +42,34 @@ impl DublinCoreThoth {
3242 }
3343}
3444
35- fn xml_escape ( value : & str ) -> String {
36- escape ( value) . into_owned ( )
45+ impl XmlElementBlock < DublinCoreThoth > for Work {
46+ fn xml_element < W : Write > ( & self , w : & mut EventWriter < W > ) -> ThothResult < ( ) > {
47+ write_dublin_core ( self , w)
48+ }
49+ }
50+
51+ fn push_text_element < W : Write > ( xml : & mut EventWriter < W > , name : & str , text : & str ) -> ThothResult < ( ) > {
52+ write_element_block ( name, xml, |xml| {
53+ xml. write ( XmlEvent :: Characters ( text) ) . map_err ( |e| e. into ( ) )
54+ } )
3755}
3856
39- fn push_text_element ( xml : & mut String , name : & str , text : & str ) {
40- xml. push ( '<' ) ;
41- xml. push_str ( name) ;
42- xml. push ( '>' ) ;
43- xml. push_str ( & xml_escape ( text) ) ;
44- xml. push_str ( "</" ) ;
45- xml. push_str ( name) ;
46- xml. push ( '>' ) ;
57+ fn push_open_tag < W : Write > (
58+ xml : & mut EventWriter < W > ,
59+ name : & str ,
60+ attrs : & [ ( & str , & str ) ] ,
61+ ) -> ThothResult < ( ) > {
62+ let mut event_builder: StartElementBuilder = XmlEvent :: start_element ( name) ;
63+ for & ( key, value) in attrs {
64+ event_builder = event_builder. attr ( key, value) ;
65+ }
66+ let event: XmlEvent = event_builder. into ( ) ;
67+ xml. write ( event) . map_err ( |e| e. into ( ) )
68+ }
69+
70+ fn push_close_tag < W : Write > ( xml : & mut EventWriter < W > , _name : & str ) -> ThothResult < ( ) > {
71+ let event: XmlEvent = XmlEvent :: end_element ( ) . into ( ) ;
72+ xml. write ( event) . map_err ( |e| e. into ( ) )
4773}
4874
4975fn normalize_value ( value : & str ) -> Option < String > {
@@ -198,22 +224,20 @@ fn normalized_license_name(license: &str) -> &str {
198224 }
199225}
200226
201- fn map_dublin_core ( work : & Work ) -> ThothResult < String > {
202- let mut xml = String :: from (
203- r#"<oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">"# ,
204- ) ;
227+ fn write_dublin_core < W : Write > ( work : & Work , xml : & mut EventWriter < W > ) -> ThothResult < ( ) > {
228+ push_open_tag ( xml, "oai_dc:dc" , DUBLIN_CORE_NS ) ?;
205229
206230 let mut title_values = Vec :: new ( ) ;
207231 let mut title_seen = HashSet :: new ( ) ;
208232 for title in ordered_titles ( work) {
209233 push_unique ( & mut title_values, & mut title_seen, title. full_title . clone ( ) ) ;
210234 }
211235 for title in title_values {
212- push_text_element ( & mut xml, "dc:title" , & title) ;
236+ push_text_element ( xml, "dc:title" , & title) ? ;
213237 }
214238
215239 for creator in creators ( work) {
216- push_text_element ( & mut xml, "dc:creator" , & creator. full_name ) ;
240+ push_text_element ( xml, "dc:creator" , & creator. full_name ) ? ;
217241 }
218242
219243 let mut subject_values = Vec :: new ( ) ;
@@ -227,7 +251,7 @@ fn map_dublin_core(work: &Work) -> ThothResult<String> {
227251 push_unique ( & mut subject_values, & mut subject_seen, value) ;
228252 }
229253 for subject in subject_values {
230- push_text_element ( & mut xml, "dc:subject" , & subject) ;
254+ push_text_element ( xml, "dc:subject" , & subject) ? ;
231255 }
232256
233257 let mut description_values = Vec :: new ( ) ;
@@ -275,24 +299,20 @@ fn map_dublin_core(work: &Work) -> ThothResult<String> {
275299 ) ;
276300 }
277301 for description in description_values {
278- push_text_element ( & mut xml, "dc:description" , & description) ;
302+ push_text_element ( xml, "dc:description" , & description) ? ;
279303 }
280304
281- push_text_element (
282- & mut xml,
283- "dc:publisher" ,
284- & work. imprint . publisher . publisher_name ,
285- ) ;
305+ push_text_element ( xml, "dc:publisher" , & work. imprint . publisher . publisher_name ) ?;
286306
287307 for contributor in contributors ( work) {
288- push_text_element ( & mut xml, "dc:contributor" , & contributor. full_name ) ;
308+ push_text_element ( xml, "dc:contributor" , & contributor. full_name ) ? ;
289309 }
290310
291311 if let Some ( publication_date) = & work. publication_date {
292- push_text_element ( & mut xml, "dc:date" , & publication_date. to_string ( ) ) ;
312+ push_text_element ( xml, "dc:date" , & publication_date. to_string ( ) ) ? ;
293313 }
294314
295- push_text_element ( & mut xml, "dc:type" , dc_type ( work) ) ;
315+ push_text_element ( xml, "dc:type" , dc_type ( work) ) ? ;
296316
297317 let mut format_values = Vec :: new ( ) ;
298318 let mut format_seen = HashSet :: new ( ) ;
@@ -304,7 +324,7 @@ fn map_dublin_core(work: &Work) -> ThothResult<String> {
304324 ) ;
305325 }
306326 for format_value in format_values {
307- push_text_element ( & mut xml, "dc:format" , & format_value) ;
327+ push_text_element ( xml, "dc:format" , & format_value) ? ;
308328 }
309329
310330 let mut identifier_values = Vec :: new ( ) ;
@@ -344,7 +364,7 @@ fn map_dublin_core(work: &Work) -> ThothResult<String> {
344364 ) ;
345365 }
346366 for identifier in identifier_values {
347- push_text_element ( & mut xml, "dc:identifier" , & identifier) ;
367+ push_text_element ( xml, "dc:identifier" , & identifier) ? ;
348368 }
349369
350370 let mut language_values = Vec :: new ( ) ;
@@ -357,7 +377,7 @@ fn map_dublin_core(work: &Work) -> ThothResult<String> {
357377 ) ;
358378 }
359379 for language in language_values {
360- push_text_element ( & mut xml, "dc:language" , & language) ;
380+ push_text_element ( xml, "dc:language" , & language) ? ;
361381 }
362382
363383 let mut relation_values = Vec :: new ( ) ;
@@ -384,7 +404,7 @@ fn map_dublin_core(work: &Work) -> ThothResult<String> {
384404 }
385405 }
386406 for relation in relation_values {
387- push_text_element ( & mut xml, "dc:relation" , & relation) ;
407+ push_text_element ( xml, "dc:relation" , & relation) ? ;
388408 }
389409
390410 let mut rights_values = Vec :: new ( ) ;
@@ -405,11 +425,23 @@ fn map_dublin_core(work: &Work) -> ThothResult<String> {
405425 ) ;
406426 }
407427 for rights in rights_values {
408- push_text_element ( & mut xml, "dc:rights" , & rights) ;
428+ push_text_element ( xml, "dc:rights" , & rights) ? ;
409429 }
410430
411- xml. push_str ( "</oai_dc:dc>" ) ;
412- Ok ( xml)
431+ push_close_tag ( xml, "oai_dc:dc" )
432+ }
433+
434+ fn map_dublin_core ( work : & Work ) -> ThothResult < String > {
435+ let mut buffer = Vec :: new ( ) ;
436+ let mut writer = EmitterConfig :: new ( )
437+ . perform_indent ( true )
438+ . create_writer ( & mut buffer) ;
439+ XmlElementBlock :: < DublinCoreThoth > :: xml_element ( work, & mut writer)
440+ . map ( |_| buffer)
441+ . and_then ( |xml| {
442+ String :: from_utf8 ( xml)
443+ . map_err ( |_| ThothError :: InternalError ( "Could not parse XML" . to_string ( ) ) )
444+ } )
413445}
414446
415447#[ cfg( test) ]
@@ -830,6 +862,20 @@ pub(crate) mod test_support {
830862mod tests {
831863 use super :: test_support:: { assert_valid_against_schema, fixture_work} ;
832864 use super :: * ;
865+ use crate :: record:: XML_DECLARATION ;
866+
867+ fn assert_precedes ( xml : & str , first : & str , second : & str ) {
868+ let first_pos = xml
869+ . find ( first)
870+ . unwrap_or_else ( || panic ! ( "Could not find `{first}` in XML output" ) ) ;
871+ let second_pos = xml
872+ . find ( second)
873+ . unwrap_or_else ( || panic ! ( "Could not find `{second}` in XML output" ) ) ;
874+ assert ! (
875+ first_pos < second_pos,
876+ "Expected `{first}` to appear before `{second}`"
877+ ) ;
878+ }
833879
834880 #[ test]
835881 fn xml_publication_type_maps_to_text_xml ( ) {
@@ -861,23 +907,35 @@ mod tests {
861907 assert ! ( xml. contains( "<dc:rights>CC BY 4.0</dc:rights>" ) ) ;
862908 assert ! ( xml. contains( "<dc:rights>Copyright holder: Example Author</dc:rights>" ) ) ;
863909 assert ! ( !xml. contains( "<dc:coverage>" ) ) ;
910+ assert_precedes (
911+ & xml,
912+ "<dc:title>Canonical Title: A Story</dc:title>" ,
913+ "<dc:title>Alternativer Titel</dc:title>" ,
914+ ) ;
915+ assert_precedes ( & xml, "<dc:type>book</dc:type>" , "<dc:format>application/pdf</dc:format>" ) ;
916+ assert_precedes (
917+ & xml,
918+ "<dc:rights>http://creativecommons.org/licenses/by/4.0/</dc:rights>" ,
919+ "<dc:rights>CC BY 4.0</dc:rights>" ,
920+ ) ;
864921
865922 assert_valid_against_schema ( & xml, "oai_dc.xsd" ) ;
866923 }
867924
868925 #[ test]
869926 fn generator_returns_single_work_xml_with_declaration ( ) {
870927 let xml = DublinCoreThoth { }
871- . generate ( & [ fixture_work ( ) ] )
928+ . generate ( & [ fixture_work ( ) ] , None )
872929 . expect ( "single dublin core" ) ;
873930 assert ! ( xml. starts_with( XML_DECLARATION ) ) ;
931+ assert ! ( !xml. starts_with( & format!( "{XML_DECLARATION}\n " ) ) ) ;
874932 assert ! ( xml. contains( "<oai_dc:dc " ) ) ;
875933 }
876934
877935 #[ test]
878936 fn generator_rejects_multiple_works ( ) {
879937 let work = fixture_work ( ) ;
880- let result = DublinCoreThoth { } . generate ( & [ work. clone ( ) , work] ) ;
938+ let result = DublinCoreThoth { } . generate ( & [ work. clone ( ) , work] , None ) ;
881939 assert ! ( matches!(
882940 result,
883941 Err ( ThothError :: IncompleteMetadataRecord ( spec, message) )
0 commit comments