@@ -610,21 +610,21 @@ pub enum MarkupFormat {
610610}
611611
612612// impl MarkupFormat {
613- // pub fn parse(input: &str) -> ThothResult<Self> {
614- // // Extract format from input extension
615- // let format = input
616- // .split('.')
617- // .last()
618- // .ok_or_else(|| ThothError::UnsuportedFileFormatError)?;
619-
620- // match format.to_lowercase().as_str() {
621- // "html" | "htm" => Ok(MarkupFormat::Html),
622- // "md" | "markdown" => Ok(MarkupFormat::Markdown),
623- // "txt" | "text" => Ok(MarkupFormat::PlainText),
624- // "xml" => Ok(MarkupFormat::JatsXml),
625- // _ => Err(ThothError::UnsuportedFileFormatError),
626- // }
627- // }
613+ // pub fn parse(input: &str) -> ThothResult<Self> {
614+ // // Extract format from input extension
615+ // let format = input
616+ // .split('.')
617+ // .last()
618+ // .ok_or_else(|| ThothError::UnsuportedFileFormatError)?;
619+
620+ // match format.to_lowercase().as_str() {
621+ // "html" | "htm" => Ok(MarkupFormat::Html),
622+ // "md" | "markdown" => Ok(MarkupFormat::Markdown),
623+ // "txt" | "text" => Ok(MarkupFormat::PlainText),
624+ // "xml" => Ok(MarkupFormat::JatsXml),
625+ // _ => Err(ThothError::UnsuportedFileFormatError),
626+ // }
627+ // }
628628// }
629629
630630/// Enum to represent abstract types
@@ -759,10 +759,7 @@ pub fn extract_title(content: &str, format: &MarkupFormat) -> ThothResult<(Strin
759759 let is_title = first_line. chars ( ) . all ( |c| !c. is_lowercase ( ) ) ;
760760
761761 // Check if second line is title case (potential subtitle)
762- let is_subtitle = second_line
763- . chars ( )
764- . next ( )
765- . is_some_and ( |c| c. is_uppercase ( ) )
762+ let is_subtitle = second_line. chars ( ) . next ( ) . is_some_and ( |c| c. is_uppercase ( ) )
766763 && second_line. chars ( ) . any ( |c| c. is_lowercase ( ) ) ;
767764
768765 let title = if is_title && !first_line. is_empty ( ) {
@@ -804,65 +801,67 @@ pub fn convert_to_jats(content: String, tag_name: String) -> ThothResult<String>
804801
805802/// Convert from JATS XML to specified format
806803pub fn convert_from_jats ( jats_xml : & str , format : MarkupFormat ) -> ThothResult < String > {
807- // Extract title and subtitle from JATS XML
808- let title_regex = Regex :: new ( r"<title>(.*?)</title>" ) . unwrap ( ) ;
809- let subtitle_regex = Regex :: new ( r"<subtitle>(.*?)</subtitle>" ) . unwrap ( ) ;
810-
811- let title = title_regex
812- . captures ( jats_xml)
813- . and_then ( |caps| caps. get ( 1 ) )
814- . map ( |m| m. as_str ( ) . trim ( ) . to_string ( ) )
815- . unwrap_or_default ( ) ;
816-
817- let subtitle = subtitle_regex
818- . captures ( jats_xml)
819- . and_then ( |caps| caps. get ( 1 ) )
820- . map ( |m| m. as_str ( ) . trim ( ) . to_string ( ) )
821- . unwrap_or_default ( ) ;
804+ validate_format ( jats_xml, & MarkupFormat :: JatsXml ) ?;
805+
806+ let content_regex =
807+ Regex :: new ( r"<([^>]+)>(.*?)</\1>" ) . map_err ( |_| ThothError :: UnsuportedFileFormatError ) ?;
808+
809+ let mut elements = Vec :: new ( ) ;
810+ for caps in content_regex. captures_iter ( jats_xml) {
811+ let tag = caps
812+ . get ( 1 )
813+ . map ( |m| m. as_str ( ) . to_string ( ) )
814+ . unwrap_or_default ( ) ;
815+ let content = caps
816+ . get ( 2 )
817+ . map ( |m| m. as_str ( ) . trim ( ) . to_string ( ) )
818+ . unwrap_or_default ( ) ;
819+ elements. push ( ( tag, content) ) ;
820+ }
822821
823822 match format {
824823 MarkupFormat :: Html => {
825824 let mut html = String :: new ( ) ;
826- if !title. is_empty ( ) {
827- html. push_str ( & format ! ( "<h1>{}</h1>\n " , title) ) ;
828- }
829- if !subtitle. is_empty ( ) {
830- html. push_str ( & format ! ( "<h2>{}</h2>\n " , subtitle) ) ;
825+ for ( tag, content) in elements {
826+ html. push_str ( & format ! ( "<{}>{}</{}>\n " , tag, content, tag) ) ;
831827 }
832828 Ok ( html)
833829 }
834830 MarkupFormat :: Markdown => {
835831 let mut markdown = String :: new ( ) ;
836- if !title. is_empty ( ) {
837- markdown. push_str ( & format ! ( "# {}\n " , title) ) ;
838- }
839- if !subtitle. is_empty ( ) {
840- markdown. push_str ( & format ! ( "## {}\n " , subtitle) ) ;
832+ for ( tag, content) in elements {
833+ match tag. as_str ( ) {
834+ "title" => markdown. push_str ( & format ! ( "# {}\n " , content) ) ,
835+ "subtitle" => markdown. push_str ( & format ! ( "## {}\n " , content) ) ,
836+ _ => markdown. push_str ( & format ! ( "{}\n " , content) ) ,
837+ }
841838 }
842839 Ok ( markdown)
843840 }
844841 MarkupFormat :: PlainText => {
845842 let mut text = String :: new ( ) ;
846- if !title. is_empty ( ) {
847- text. push_str ( & format ! ( "{}\n " , title. to_uppercase( ) ) ) ;
848- }
849- if !subtitle. is_empty ( ) {
850- // Convert to title case
851- let title_case = subtitle
852- . split_whitespace ( )
853- . map ( |word| {
854- let mut chars = word. chars ( ) ;
855- match chars. next ( ) {
856- None => String :: new ( ) ,
857- Some ( first) => first
858- . to_uppercase ( )
859- . chain ( chars. flat_map ( |c| c. to_lowercase ( ) ) )
860- . collect ( ) ,
861- }
862- } )
863- . collect :: < Vec < String > > ( )
864- . join ( " " ) ;
865- text. push_str ( & format ! ( "{}\n " , title_case) ) ;
843+ for ( tag, content) in elements {
844+ match tag. as_str ( ) {
845+ "title" => text. push_str ( & format ! ( "{}\n " , content. to_uppercase( ) ) ) ,
846+ "subtitle" => {
847+ let title_case = content
848+ . split_whitespace ( )
849+ . map ( |word| {
850+ let mut chars = word. chars ( ) ;
851+ match chars. next ( ) {
852+ None => String :: new ( ) ,
853+ Some ( first) => first
854+ . to_uppercase ( )
855+ . chain ( chars. flat_map ( |c| c. to_lowercase ( ) ) )
856+ . collect ( ) ,
857+ }
858+ } )
859+ . collect :: < Vec < String > > ( )
860+ . join ( " " ) ;
861+ text. push_str ( & format ! ( "{}\n " , title_case) ) ;
862+ }
863+ _ => text. push_str ( & format ! ( "{}\n " , content) ) ,
864+ }
866865 }
867866 Ok ( text)
868867 }
0 commit comments