@@ -325,16 +325,31 @@ fn write_jats_content<W: Write>(content: &str, w: &mut EventWriter<W>) -> ThothR
325325 let mut event_builder = XmlEvent :: start_element ( & * name) ;
326326
327327 // Add attributes
328- let attrs: Vec < ( String , String ) > = e
328+ let attrs: ThothResult < Vec < ( String , String ) > > = e
329329 . attributes ( )
330- . flatten ( )
331330 . map ( |attr| {
332- (
331+ let attr = attr. map_err ( |err| {
332+ ThothError :: InternalError ( format ! (
333+ "Error parsing JATS content attributes: {}" ,
334+ err
335+ ) )
336+ } ) ?;
337+ let value = attr
338+ . decode_and_unescape_value ( reader. decoder ( ) )
339+ . map_err ( |err| {
340+ ThothError :: InternalError ( format ! (
341+ "Error decoding JATS content attributes: {}" ,
342+ err
343+ ) )
344+ } ) ?
345+ . into_owned ( ) ;
346+ Ok ( (
333347 String :: from_utf8_lossy ( attr. key . as_ref ( ) ) . to_string ( ) ,
334- String :: from_utf8_lossy ( & attr . value ) . to_string ( ) ,
335- )
348+ value,
349+ ) )
336350 } )
337351 . collect ( ) ;
352+ let attrs = attrs?;
338353
339354 for ( key, value) in & attrs {
340355 event_builder = event_builder. attr ( key. as_str ( ) , value. as_str ( ) ) ;
@@ -357,16 +372,31 @@ fn write_jats_content<W: Write>(content: &str, w: &mut EventWriter<W>) -> ThothR
357372 let mut event_builder = XmlEvent :: start_element ( & * name) ;
358373
359374 // Add attributes
360- let attrs: Vec < ( String , String ) > = e
375+ let attrs: ThothResult < Vec < ( String , String ) > > = e
361376 . attributes ( )
362- . flatten ( )
363377 . map ( |attr| {
364- (
378+ let attr = attr. map_err ( |err| {
379+ ThothError :: InternalError ( format ! (
380+ "Error parsing JATS content attributes: {}" ,
381+ err
382+ ) )
383+ } ) ?;
384+ let value = attr
385+ . decode_and_unescape_value ( reader. decoder ( ) )
386+ . map_err ( |err| {
387+ ThothError :: InternalError ( format ! (
388+ "Error decoding JATS content attributes: {}" ,
389+ err
390+ ) )
391+ } ) ?
392+ . into_owned ( ) ;
393+ Ok ( (
365394 String :: from_utf8_lossy ( attr. key . as_ref ( ) ) . to_string ( ) ,
366- String :: from_utf8_lossy ( & attr . value ) . to_string ( ) ,
367- )
395+ value,
396+ ) )
368397 } )
369398 . collect ( ) ;
399+ let attrs = attrs?;
370400
371401 for ( key, value) in & attrs {
372402 event_builder = event_builder. attr ( key. as_str ( ) , value. as_str ( ) ) ;
@@ -2487,7 +2517,7 @@ mod tests {
24872517 // Should not contain any paragraph elements
24882518 assert ! ( !output. contains( r#"<jats:p>"# ) ) ;
24892519
2490- // Nested paragraph wrappers should be flattened before writing .
2520+ // Nested paragraph wrappers are invalid JATS and should be rejected .
24912521 let mut buffer = Vec :: new ( ) ;
24922522 let mut writer = xml:: writer:: EmitterConfig :: new ( )
24932523 . perform_indent ( true )
@@ -2500,13 +2530,9 @@ mod tests {
25002530 & mut writer,
25012531 ) ;
25022532
2503- assert ! ( result. is_ok( ) ) ;
2504- let output = String :: from_utf8 ( buffer) . unwrap ( ) ;
2505- assert ! ( output. contains( r#"<jats:p>Nested paragraph.</jats:p>"# ) ) ;
2506- assert ! ( !output. contains( r#"<jats:p><jats:p>"# ) ) ;
2507- assert ! ( !output. contains( r#"<jats:p />"# ) ) ;
2533+ assert ! ( result. is_err( ) ) ;
25082534
2509- // Break elements should be converted into sibling paragraphs .
2535+ // Break elements are invalid JATS and should be rejected .
25102536 let mut buffer = Vec :: new ( ) ;
25112537 let mut writer = xml:: writer:: EmitterConfig :: new ( )
25122538 . perform_indent ( true )
@@ -2519,11 +2545,7 @@ mod tests {
25192545 & mut writer,
25202546 ) ;
25212547
2522- assert ! ( result. is_ok( ) ) ;
2523- let output = String :: from_utf8 ( buffer) . unwrap ( ) ;
2524- assert ! ( output. contains( r#"<jats:p>First line</jats:p>"# ) ) ;
2525- assert ! ( output. contains( r#"<jats:p>Second line</jats:p>"# ) ) ;
2526- assert ! ( !output. contains( r#"<jats:break"# ) ) ;
2548+ assert ! ( result. is_err( ) ) ;
25272549
25282550 // Locale codes written to xml:lang should use BCP 47 hyphen separators.
25292551 let mut buffer = Vec :: new ( ) ;
@@ -2563,6 +2585,32 @@ mod tests {
25632585 assert ! ( error. contains( "Invalid Crossref abstract markup" ) ) ;
25642586 }
25652587
2588+ #[ test]
2589+ fn test_write_abstract_content_with_locale_code_escapes_link_attributes_once ( ) {
2590+ let mut buffer = Vec :: new ( ) ;
2591+ let mut writer = xml:: writer:: EmitterConfig :: new ( )
2592+ . perform_indent ( true )
2593+ . create_writer ( & mut buffer) ;
2594+
2595+ let result = write_abstract_content_with_locale_code (
2596+ r#"<p><ext-link xlink:href="https://example.org?a=1&b=2&quote="hi"&apos='ok'">link</ext-link></p>"# ,
2597+ "long" ,
2598+ "EN" ,
2599+ & mut writer,
2600+ ) ;
2601+
2602+ assert ! ( result. is_ok( ) ) ;
2603+ let output = String :: from_utf8 ( buffer) . unwrap ( ) ;
2604+ assert ! (
2605+ output. contains(
2606+ r#"<jats:ext-link xlink:href="https://example.org?a=1&b=2&quote="hi"&apos='ok'">link</jats:ext-link>"#
2607+ )
2608+ ) ;
2609+ assert ! ( !output. contains( "&amp;" ) ) ;
2610+ assert ! ( !output. contains( "&quot;" ) ) ;
2611+ assert ! ( !output. contains( "&apos;" ) ) ;
2612+ }
2613+
25662614 #[ test]
25672615 // Crossref previously limited the number of ISBNs that could be included in a deposit file to 6,
25682616 // but this has now been increased in schema version 5.4.0 to 100 (which will never become relevant).
0 commit comments