Skip to content

Commit db69240

Browse files
committed
fix: convert from jats for title, titles queries
1 parent cceb497 commit db69240

2 files changed

Lines changed: 86 additions & 69 deletions

File tree

thoth-api/src/graphql/model.rs

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ use thoth_errors::{ThothError, ThothResult};
4040

4141
use super::utils::{Direction, Expression};
4242
use crate::model::LocaleCode;
43+
use crate::model::{convert_from_jats, MarkupFormat};
4344

4445
impl juniper::Context for Context {}
4546

@@ -1467,8 +1468,14 @@ impl QueryRoot {
14671468
}
14681469

14691470
#[graphql(description = "Query a title by its ID")]
1470-
fn title(context: &Context, title_id: Uuid) -> FieldResult<Title> {
1471-
Title::from_id(&context.db, &title_id).map_err(|e| e.into())
1471+
fn title(context: &Context, title_id: Uuid, markup_format: MarkupFormat) -> FieldResult<Title> {
1472+
let mut title = Title::from_id(&context.db, &title_id).map_err(|e| e.into())?;
1473+
title.title = convert_from_jats(&title.title, markup_format)?;
1474+
if let Some(subtitle) = &title.subtitle {
1475+
title.subtitle = Some(convert_from_jats(subtitle, markup_format)?);
1476+
}
1477+
title.full_title = convert_from_jats(&title.full_title, markup_format)?;
1478+
Ok(title)
14721479
}
14731480

14741481
#[graphql(description = "Query titles by work ID")]
@@ -1491,8 +1498,9 @@ impl QueryRoot {
14911498
description = "If set, only shows results with these locale codes"
14921499
)]
14931500
locale_codes: Option<Vec<LocaleCode>>,
1501+
markup_format: MarkupFormat,
14941502
) -> FieldResult<Vec<Title>> {
1495-
Title::all(
1503+
let mut titles = Title::all(
14961504
&context.db,
14971505
limit.unwrap_or_default(),
14981506
offset.unwrap_or_default(),
@@ -1505,7 +1513,16 @@ impl QueryRoot {
15051513
vec![],
15061514
None,
15071515
)
1508-
.map_err(|e| e.into())
1516+
.map_err(FieldError::from)?;
1517+
1518+
for title in &mut titles {
1519+
title.title = convert_from_jats(&title.title, markup_format)?;
1520+
if let Some(subtitle) = &title.subtitle {
1521+
title.subtitle = Some(convert_from_jats(subtitle, markup_format)?);
1522+
}
1523+
title.full_title = convert_from_jats(&title.full_title, markup_format)?;
1524+
}
1525+
Ok(titles)
15091526
}
15101527
}
15111528

@@ -2046,7 +2063,8 @@ impl MutationRoot {
20462063
let (t, s) = extract_title(&data.full_title, &data.markup_format)?;
20472064

20482065
let (title_jats_xml, subtitle_jats_xml) = (
2049-
convert_to_jats(t, "title".to_string())?, convert_to_jats(s, "subtitle".to_string())?,
2066+
convert_to_jats(t, "title".to_string())?,
2067+
convert_to_jats(s, "subtitle".to_string())?,
20502068
);
20512069

20522070
let mut data = data.clone();

thoth-api/src/model/mod.rs

Lines changed: 63 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -610,21 +610,21 @@ pub enum MarkupFormat {
610610
}
611611

612612
// impl MarkupFormat {
613-
// pub fn parse(input: &str) -> ThothResult<Self> {
614-
// // Extract format from input extension
615-
// let format = input
616-
// .split('.')
617-
// .last()
618-
// .ok_or_else(|| ThothError::UnsuportedFileFormatError)?;
619-
620-
// match format.to_lowercase().as_str() {
621-
// "html" | "htm" => Ok(MarkupFormat::Html),
622-
// "md" | "markdown" => Ok(MarkupFormat::Markdown),
623-
// "txt" | "text" => Ok(MarkupFormat::PlainText),
624-
// "xml" => Ok(MarkupFormat::JatsXml),
625-
// _ => Err(ThothError::UnsuportedFileFormatError),
626-
// }
627-
// }
613+
// pub fn parse(input: &str) -> ThothResult<Self> {
614+
// // Extract format from input extension
615+
// let format = input
616+
// .split('.')
617+
// .last()
618+
// .ok_or_else(|| ThothError::UnsuportedFileFormatError)?;
619+
620+
// match format.to_lowercase().as_str() {
621+
// "html" | "htm" => Ok(MarkupFormat::Html),
622+
// "md" | "markdown" => Ok(MarkupFormat::Markdown),
623+
// "txt" | "text" => Ok(MarkupFormat::PlainText),
624+
// "xml" => Ok(MarkupFormat::JatsXml),
625+
// _ => Err(ThothError::UnsuportedFileFormatError),
626+
// }
627+
// }
628628
// }
629629

630630
/// Enum to represent abstract types
@@ -759,10 +759,7 @@ pub fn extract_title(content: &str, format: &MarkupFormat) -> ThothResult<(Strin
759759
let is_title = first_line.chars().all(|c| !c.is_lowercase());
760760

761761
// Check if second line is title case (potential subtitle)
762-
let is_subtitle = second_line
763-
.chars()
764-
.next()
765-
.is_some_and(|c| c.is_uppercase())
762+
let is_subtitle = second_line.chars().next().is_some_and(|c| c.is_uppercase())
766763
&& second_line.chars().any(|c| c.is_lowercase());
767764

768765
let title = if is_title && !first_line.is_empty() {
@@ -804,65 +801,67 @@ pub fn convert_to_jats(content: String, tag_name: String) -> ThothResult<String>
804801

805802
/// Convert from JATS XML to specified format
806803
pub fn convert_from_jats(jats_xml: &str, format: MarkupFormat) -> ThothResult<String> {
807-
// Extract title and subtitle from JATS XML
808-
let title_regex = Regex::new(r"<title>(.*?)</title>").unwrap();
809-
let subtitle_regex = Regex::new(r"<subtitle>(.*?)</subtitle>").unwrap();
810-
811-
let title = title_regex
812-
.captures(jats_xml)
813-
.and_then(|caps| caps.get(1))
814-
.map(|m| m.as_str().trim().to_string())
815-
.unwrap_or_default();
816-
817-
let subtitle = subtitle_regex
818-
.captures(jats_xml)
819-
.and_then(|caps| caps.get(1))
820-
.map(|m| m.as_str().trim().to_string())
821-
.unwrap_or_default();
804+
validate_format(jats_xml, &MarkupFormat::JatsXml)?;
805+
806+
let content_regex =
807+
Regex::new(r"<([^>]+)>(.*?)</\1>").map_err(|_| ThothError::UnsuportedFileFormatError)?;
808+
809+
let mut elements = Vec::new();
810+
for caps in content_regex.captures_iter(jats_xml) {
811+
let tag = caps
812+
.get(1)
813+
.map(|m| m.as_str().to_string())
814+
.unwrap_or_default();
815+
let content = caps
816+
.get(2)
817+
.map(|m| m.as_str().trim().to_string())
818+
.unwrap_or_default();
819+
elements.push((tag, content));
820+
}
822821

823822
match format {
824823
MarkupFormat::Html => {
825824
let mut html = String::new();
826-
if !title.is_empty() {
827-
html.push_str(&format!("<h1>{}</h1>\n", title));
828-
}
829-
if !subtitle.is_empty() {
830-
html.push_str(&format!("<h2>{}</h2>\n", subtitle));
825+
for (tag, content) in elements {
826+
html.push_str(&format!("<{}>{}</{}>\n", tag, content, tag));
831827
}
832828
Ok(html)
833829
}
834830
MarkupFormat::Markdown => {
835831
let mut markdown = String::new();
836-
if !title.is_empty() {
837-
markdown.push_str(&format!("# {}\n", title));
838-
}
839-
if !subtitle.is_empty() {
840-
markdown.push_str(&format!("## {}\n", subtitle));
832+
for (tag, content) in elements {
833+
match tag.as_str() {
834+
"title" => markdown.push_str(&format!("# {}\n", content)),
835+
"subtitle" => markdown.push_str(&format!("## {}\n", content)),
836+
_ => markdown.push_str(&format!("{}\n", content)),
837+
}
841838
}
842839
Ok(markdown)
843840
}
844841
MarkupFormat::PlainText => {
845842
let mut text = String::new();
846-
if !title.is_empty() {
847-
text.push_str(&format!("{}\n", title.to_uppercase()));
848-
}
849-
if !subtitle.is_empty() {
850-
// Convert to title case
851-
let title_case = subtitle
852-
.split_whitespace()
853-
.map(|word| {
854-
let mut chars = word.chars();
855-
match chars.next() {
856-
None => String::new(),
857-
Some(first) => first
858-
.to_uppercase()
859-
.chain(chars.flat_map(|c| c.to_lowercase()))
860-
.collect(),
861-
}
862-
})
863-
.collect::<Vec<String>>()
864-
.join(" ");
865-
text.push_str(&format!("{}\n", title_case));
843+
for (tag, content) in elements {
844+
match tag.as_str() {
845+
"title" => text.push_str(&format!("{}\n", content.to_uppercase())),
846+
"subtitle" => {
847+
let title_case = content
848+
.split_whitespace()
849+
.map(|word| {
850+
let mut chars = word.chars();
851+
match chars.next() {
852+
None => String::new(),
853+
Some(first) => first
854+
.to_uppercase()
855+
.chain(chars.flat_map(|c| c.to_lowercase()))
856+
.collect(),
857+
}
858+
})
859+
.collect::<Vec<String>>()
860+
.join(" ");
861+
text.push_str(&format!("{}\n", title_case));
862+
}
863+
_ => text.push_str(&format!("{}\n", content)),
864+
}
866865
}
867866
Ok(text)
868867
}

0 commit comments

Comments
 (0)