Skip to content

Commit 0bcccd4

Browse files
committed
Use common xml patterns
1 parent cc8d9c6 commit 0bcccd4

3 files changed

Lines changed: 269 additions & 160 deletions

File tree

thoth-export-server/src/record.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -280,9 +280,11 @@ impl MetadataRecord {
280280
marc21xml_thoth.generate(&data)
281281
}
282282
MetadataSpecification::DublinCoreThoth(dublin_core_thoth) => {
283-
dublin_core_thoth.generate(&data)
283+
dublin_core_thoth.generate(&data, None)
284+
}
285+
MetadataSpecification::OpenaireThoth(openaire_thoth) => {
286+
openaire_thoth.generate(&data, None)
284287
}
285-
MetadataSpecification::OpenaireThoth(openaire_thoth) => openaire_thoth.generate(&data),
286288
}
287289
}
288290
}

thoth-export-server/src/xml/dublincore_thoth.rs

Lines changed: 99 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,39 @@
1+
use super::{write_element_block, XmlElementBlock, XmlSpecification};
12
use std::collections::HashSet;
3+
use std::io::Write;
24

3-
use quick_xml::escape::escape;
45
use thoth_api::markup::{convert_from_jats, ConversionLimit, MarkupFormat};
56
use thoth_client::{
67
AbstractType, ContributionType, LanguageRelation, PublicationType, SubjectType, Work,
78
WorkAbstracts, WorkContributions, WorkLanguages, WorkTitles,
89
};
910
use thoth_errors::{ThothError, ThothResult};
10-
11-
use crate::record::XML_DECLARATION;
11+
use xml::writer::events::StartElementBuilder;
12+
use xml::writer::{EmitterConfig, EventWriter, XmlEvent};
1213

1314
const DUBLIN_CORE_ERROR: &str = "dublin_core::thoth";
1415
const BY_WORK_ONLY_MESSAGE: &str = "Output can only be generated for one work at a time";
16+
const DUBLIN_CORE_NS: &[(&str, &str)] = &[
17+
("xmlns:oai_dc", "http://www.openarchives.org/OAI/2.0/oai_dc/"),
18+
("xmlns:dc", "http://purl.org/dc/elements/1.1/"),
19+
("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance"),
20+
(
21+
"xsi:schemaLocation",
22+
"http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd",
23+
),
24+
];
1525

1626
#[derive(Copy, Clone)]
1727
pub(crate) struct DublinCoreThoth;
1828

19-
impl DublinCoreThoth {
20-
pub(crate) fn generate(&self, works: &[Work]) -> ThothResult<String> {
29+
impl XmlSpecification for DublinCoreThoth {
30+
fn handle_event<W: Write>(w: &mut EventWriter<W>, works: &[Work]) -> ThothResult<()> {
2131
match works {
2232
[] => Err(ThothError::IncompleteMetadataRecord(
2333
DUBLIN_CORE_ERROR.to_string(),
2434
"Not enough data".to_string(),
2535
)),
26-
[work] => Ok(format!("{XML_DECLARATION}\n{}", map_dublin_core(work)?)),
36+
[work] => XmlElementBlock::<DublinCoreThoth>::xml_element(work, w),
2737
_ => Err(ThothError::IncompleteMetadataRecord(
2838
DUBLIN_CORE_ERROR.to_string(),
2939
BY_WORK_ONLY_MESSAGE.to_string(),
@@ -32,18 +42,34 @@ impl DublinCoreThoth {
3242
}
3343
}
3444

35-
fn xml_escape(value: &str) -> String {
36-
escape(value).into_owned()
45+
impl XmlElementBlock<DublinCoreThoth> for Work {
46+
fn xml_element<W: Write>(&self, w: &mut EventWriter<W>) -> ThothResult<()> {
47+
write_dublin_core(self, w)
48+
}
49+
}
50+
51+
fn push_text_element<W: Write>(xml: &mut EventWriter<W>, name: &str, text: &str) -> ThothResult<()> {
52+
write_element_block(name, xml, |xml| {
53+
xml.write(XmlEvent::Characters(text)).map_err(|e| e.into())
54+
})
3755
}
3856

39-
fn push_text_element(xml: &mut String, name: &str, text: &str) {
40-
xml.push('<');
41-
xml.push_str(name);
42-
xml.push('>');
43-
xml.push_str(&xml_escape(text));
44-
xml.push_str("</");
45-
xml.push_str(name);
46-
xml.push('>');
57+
fn push_open_tag<W: Write>(
58+
xml: &mut EventWriter<W>,
59+
name: &str,
60+
attrs: &[(&str, &str)],
61+
) -> ThothResult<()> {
62+
let mut event_builder: StartElementBuilder = XmlEvent::start_element(name);
63+
for &(key, value) in attrs {
64+
event_builder = event_builder.attr(key, value);
65+
}
66+
let event: XmlEvent = event_builder.into();
67+
xml.write(event).map_err(|e| e.into())
68+
}
69+
70+
fn push_close_tag<W: Write>(xml: &mut EventWriter<W>, _name: &str) -> ThothResult<()> {
71+
let event: XmlEvent = XmlEvent::end_element().into();
72+
xml.write(event).map_err(|e| e.into())
4773
}
4874

4975
fn normalize_value(value: &str) -> Option<String> {
@@ -198,22 +224,20 @@ fn normalized_license_name(license: &str) -> &str {
198224
}
199225
}
200226

201-
fn map_dublin_core(work: &Work) -> ThothResult<String> {
202-
let mut xml = String::from(
203-
r#"<oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">"#,
204-
);
227+
fn write_dublin_core<W: Write>(work: &Work, xml: &mut EventWriter<W>) -> ThothResult<()> {
228+
push_open_tag(xml, "oai_dc:dc", DUBLIN_CORE_NS)?;
205229

206230
let mut title_values = Vec::new();
207231
let mut title_seen = HashSet::new();
208232
for title in ordered_titles(work) {
209233
push_unique(&mut title_values, &mut title_seen, title.full_title.clone());
210234
}
211235
for title in title_values {
212-
push_text_element(&mut xml, "dc:title", &title);
236+
push_text_element(xml, "dc:title", &title)?;
213237
}
214238

215239
for creator in creators(work) {
216-
push_text_element(&mut xml, "dc:creator", &creator.full_name);
240+
push_text_element(xml, "dc:creator", &creator.full_name)?;
217241
}
218242

219243
let mut subject_values = Vec::new();
@@ -227,7 +251,7 @@ fn map_dublin_core(work: &Work) -> ThothResult<String> {
227251
push_unique(&mut subject_values, &mut subject_seen, value);
228252
}
229253
for subject in subject_values {
230-
push_text_element(&mut xml, "dc:subject", &subject);
254+
push_text_element(xml, "dc:subject", &subject)?;
231255
}
232256

233257
let mut description_values = Vec::new();
@@ -275,24 +299,20 @@ fn map_dublin_core(work: &Work) -> ThothResult<String> {
275299
);
276300
}
277301
for description in description_values {
278-
push_text_element(&mut xml, "dc:description", &description);
302+
push_text_element(xml, "dc:description", &description)?;
279303
}
280304

281-
push_text_element(
282-
&mut xml,
283-
"dc:publisher",
284-
&work.imprint.publisher.publisher_name,
285-
);
305+
push_text_element(xml, "dc:publisher", &work.imprint.publisher.publisher_name)?;
286306

287307
for contributor in contributors(work) {
288-
push_text_element(&mut xml, "dc:contributor", &contributor.full_name);
308+
push_text_element(xml, "dc:contributor", &contributor.full_name)?;
289309
}
290310

291311
if let Some(publication_date) = &work.publication_date {
292-
push_text_element(&mut xml, "dc:date", &publication_date.to_string());
312+
push_text_element(xml, "dc:date", &publication_date.to_string())?;
293313
}
294314

295-
push_text_element(&mut xml, "dc:type", dc_type(work));
315+
push_text_element(xml, "dc:type", dc_type(work))?;
296316

297317
let mut format_values = Vec::new();
298318
let mut format_seen = HashSet::new();
@@ -304,7 +324,7 @@ fn map_dublin_core(work: &Work) -> ThothResult<String> {
304324
);
305325
}
306326
for format_value in format_values {
307-
push_text_element(&mut xml, "dc:format", &format_value);
327+
push_text_element(xml, "dc:format", &format_value)?;
308328
}
309329

310330
let mut identifier_values = Vec::new();
@@ -344,7 +364,7 @@ fn map_dublin_core(work: &Work) -> ThothResult<String> {
344364
);
345365
}
346366
for identifier in identifier_values {
347-
push_text_element(&mut xml, "dc:identifier", &identifier);
367+
push_text_element(xml, "dc:identifier", &identifier)?;
348368
}
349369

350370
let mut language_values = Vec::new();
@@ -357,7 +377,7 @@ fn map_dublin_core(work: &Work) -> ThothResult<String> {
357377
);
358378
}
359379
for language in language_values {
360-
push_text_element(&mut xml, "dc:language", &language);
380+
push_text_element(xml, "dc:language", &language)?;
361381
}
362382

363383
let mut relation_values = Vec::new();
@@ -384,7 +404,7 @@ fn map_dublin_core(work: &Work) -> ThothResult<String> {
384404
}
385405
}
386406
for relation in relation_values {
387-
push_text_element(&mut xml, "dc:relation", &relation);
407+
push_text_element(xml, "dc:relation", &relation)?;
388408
}
389409

390410
let mut rights_values = Vec::new();
@@ -405,11 +425,23 @@ fn map_dublin_core(work: &Work) -> ThothResult<String> {
405425
);
406426
}
407427
for rights in rights_values {
408-
push_text_element(&mut xml, "dc:rights", &rights);
428+
push_text_element(xml, "dc:rights", &rights)?;
409429
}
410430

411-
xml.push_str("</oai_dc:dc>");
412-
Ok(xml)
431+
push_close_tag(xml, "oai_dc:dc")
432+
}
433+
434+
fn map_dublin_core(work: &Work) -> ThothResult<String> {
435+
let mut buffer = Vec::new();
436+
let mut writer = EmitterConfig::new()
437+
.perform_indent(true)
438+
.create_writer(&mut buffer);
439+
XmlElementBlock::<DublinCoreThoth>::xml_element(work, &mut writer)
440+
.map(|_| buffer)
441+
.and_then(|xml| {
442+
String::from_utf8(xml)
443+
.map_err(|_| ThothError::InternalError("Could not parse XML".to_string()))
444+
})
413445
}
414446

415447
#[cfg(test)]
@@ -830,6 +862,20 @@ pub(crate) mod test_support {
830862
mod tests {
831863
use super::test_support::{assert_valid_against_schema, fixture_work};
832864
use super::*;
865+
use crate::record::XML_DECLARATION;
866+
867+
fn assert_precedes(xml: &str, first: &str, second: &str) {
868+
let first_pos = xml
869+
.find(first)
870+
.unwrap_or_else(|| panic!("Could not find `{first}` in XML output"));
871+
let second_pos = xml
872+
.find(second)
873+
.unwrap_or_else(|| panic!("Could not find `{second}` in XML output"));
874+
assert!(
875+
first_pos < second_pos,
876+
"Expected `{first}` to appear before `{second}`"
877+
);
878+
}
833879

834880
#[test]
835881
fn xml_publication_type_maps_to_text_xml() {
@@ -861,23 +907,35 @@ mod tests {
861907
assert!(xml.contains("<dc:rights>CC BY 4.0</dc:rights>"));
862908
assert!(xml.contains("<dc:rights>Copyright holder: Example Author</dc:rights>"));
863909
assert!(!xml.contains("<dc:coverage>"));
910+
assert_precedes(
911+
&xml,
912+
"<dc:title>Canonical Title: A Story</dc:title>",
913+
"<dc:title>Alternativer Titel</dc:title>",
914+
);
915+
assert_precedes(&xml, "<dc:type>book</dc:type>", "<dc:format>application/pdf</dc:format>");
916+
assert_precedes(
917+
&xml,
918+
"<dc:rights>http://creativecommons.org/licenses/by/4.0/</dc:rights>",
919+
"<dc:rights>CC BY 4.0</dc:rights>",
920+
);
864921

865922
assert_valid_against_schema(&xml, "oai_dc.xsd");
866923
}
867924

868925
#[test]
869926
fn generator_returns_single_work_xml_with_declaration() {
870927
let xml = DublinCoreThoth {}
871-
.generate(&[fixture_work()])
928+
.generate(&[fixture_work()], None)
872929
.expect("single dublin core");
873930
assert!(xml.starts_with(XML_DECLARATION));
931+
assert!(!xml.starts_with(&format!("{XML_DECLARATION}\n")));
874932
assert!(xml.contains("<oai_dc:dc "));
875933
}
876934

877935
#[test]
878936
fn generator_rejects_multiple_works() {
879937
let work = fixture_work();
880-
let result = DublinCoreThoth {}.generate(&[work.clone(), work]);
938+
let result = DublinCoreThoth {}.generate(&[work.clone(), work], None);
881939
assert!(matches!(
882940
result,
883941
Err(ThothError::IncompleteMetadataRecord(spec, message))

0 commit comments

Comments
 (0)