diff --git a/docx-core/examples/render.rs b/docx-core/examples/render.rs new file mode 100644 index 000000000..2129819fe --- /dev/null +++ b/docx-core/examples/render.rs @@ -0,0 +1,14 @@ +use std::collections::HashMap; + +fn main() { + let path = r"C:\Users\33028\Desktop\test_render.docx"; + let mut docx = docx_rs::Docx::read_file(path).unwrap(); + let dictionary = HashMap::from([ + ("test1".to_string(), "岳卓".to_string()), + ("test2".to_string(), "test2_value".to_string()), + ("test3".to_string(), "test3_value".to_string()), + ]); + docx.render(&dictionary); + let file = std::fs::File::create(r"C:\Users\33028\Desktop\test_render1.docx").unwrap(); + docx.build().pack(file).unwrap(); +} diff --git a/docx-core/examples/to_plain_text.rs b/docx-core/examples/to_plain_text.rs new file mode 100644 index 000000000..7b394ad52 --- /dev/null +++ b/docx-core/examples/to_plain_text.rs @@ -0,0 +1,9 @@ +use docx_rs::Docx; + +fn main() { + let path = + std::path::Path::new(r"C:\Users\33028\Desktop\全市部分县区统计执法检查报告 (1).docx"); + let docx = Docx::read_file(&path).unwrap(); + let plain_text = docx.to_plain_text(); + println!("{}", plain_text); +} diff --git a/docx-core/src/documents/document.rs b/docx-core/src/documents/document.rs index 32162ca97..6b21084ef 100644 --- a/docx-core/src/documents/document.rs +++ b/docx-core/src/documents/document.rs @@ -98,6 +98,48 @@ impl Document { Default::default() } + pub fn to_plain_text(&self) -> String { + let mut text = String::new(); + for child in &self.children { + match child { + DocumentChild::Paragraph(p) => { + text.push_str(&p.to_plain_text()); + } + DocumentChild::Table(t) => { + text.push_str(&t.to_plain_text()); + } + DocumentChild::BookmarkStart(_) => {} + DocumentChild::BookmarkEnd(_) => {} + DocumentChild::CommentStart(_) => {} + DocumentChild::CommentEnd(_) => {} + DocumentChild::StructuredDataTag(_) => {} + DocumentChild::TableOfContents(_) => {} + } + } + text + } + + pub fn get_vars(&self) -> Vec { + let mut vars = Vec::new(); + for c in self.children.iter() { + if let DocumentChild::Paragraph(p) = c { + vars.extend(p.get_vars()) + } + } + vars + } + + pub fn render(&mut self, dictionary: &HashMap) { + for c in self.children.iter_mut() { + if let DocumentChild::Paragraph(p) = c { + p.render(dictionary); + } + if let DocumentChild::Table(t) = c { + t.render(dictionary); + } + } + } + pub fn add_paragraph(mut self, p: Paragraph) -> Self { if p.has_numbering { self.has_numbering = true diff --git a/docx-core/src/documents/elements/indent.rs b/docx-core/src/documents/elements/indent.rs index 6f12fde30..63cdebd9a 100644 --- a/docx-core/src/documents/elements/indent.rs +++ b/docx-core/src/documents/elements/indent.rs @@ -58,6 +58,8 @@ impl BuildXML for Indent { self.special_indent, self.end.unwrap_or_default(), self.start_chars, + self.first_line_chars, + self.hanging_chars, ) .build() } diff --git a/docx-core/src/documents/elements/paragraph.rs b/docx-core/src/documents/elements/paragraph.rs index 69509165d..6fc747e89 100644 --- a/docx-core/src/documents/elements/paragraph.rs +++ b/docx-core/src/documents/elements/paragraph.rs @@ -1,3 +1,5 @@ +use std::collections::HashMap; + use serde::ser::{SerializeStruct, Serializer}; use serde::Serialize; @@ -124,6 +126,64 @@ impl Paragraph { Default::default() } + pub fn to_plain_text(&self) -> String { + let mut s = "".to_string(); + for c in self.children.iter() { + match c { + ParagraphChild::Run(run) => { + for c in run.children.iter() { + if let RunChild::Text(t) = c { + s.push_str(&t.text); + } + } + } + ParagraphChild::Insert(i) => { + for c in i.children.iter() { + if let InsertChild::Run(r) = c { + for c in r.children.iter() { + if let RunChild::Text(t) = c { + s.push_str(&t.text); + } + } + } + } + } + ParagraphChild::Delete(d) => { + for c in d.children.iter() { + if let DeleteChild::Run(r) = c { + for c in r.children.iter() { + if let RunChild::Text(t) = c { + s.push_str(&t.text); + } + } + } + } + } + _ => {} + } + } + s.push('\n'); + s + } + + pub fn get_vars(&self) -> Vec { + let mut vars = Vec::new(); + for c in self.children.iter() { + if let ParagraphChild::Run(r) = c { + vars.extend(r.get_vars()) + } + } + vars + } + + pub fn render(&mut self, dictionary: &HashMap) { + for c in self.children.iter_mut() { + if let ParagraphChild::Run(r) = c { + r.render(dictionary) + } + } + } + pub fn id(mut self, id: impl Into) -> Self { self.id = id.into(); self diff --git a/docx-core/src/documents/elements/run.rs b/docx-core/src/documents/elements/run.rs index 9827c1b08..cbc3f13ac 100644 --- a/docx-core/src/documents/elements/run.rs +++ b/docx-core/src/documents/elements/run.rs @@ -1,3 +1,5 @@ +use std::collections::HashMap; + use super::*; use serde::ser::{SerializeStruct, Serializer}; use serde::Serialize; @@ -135,6 +137,26 @@ impl Run { } } + pub fn get_vars(&self) -> Vec { + let mut vars = Vec::new(); + + for c in self.children.iter() { + if let RunChild::Text(t) = c { + vars.extend(t.get_vars()) + } + } + + vars + } + + pub fn render(&mut self, dictionary: &HashMap) { + for c in self.children.iter_mut() { + if let RunChild::Text(t) = c { + t.render(dictionary) + } + } + } + pub fn add_text(mut self, text: impl Into) -> Run { self.children .push(RunChild::Text(Text::new(text.into().replace('\n', "")))); diff --git a/docx-core/src/documents/elements/table.rs b/docx-core/src/documents/elements/table.rs index b6dc08471..1835240dc 100644 --- a/docx-core/src/documents/elements/table.rs +++ b/docx-core/src/documents/elements/table.rs @@ -1,3 +1,5 @@ +use std::collections::HashMap; + use serde::ser::{SerializeStruct, Serializer}; use serde::Serialize; @@ -20,6 +22,14 @@ pub enum TableChild { TableRow(TableRow), } +impl TableChild { + pub fn to_plain_text(&self) -> String { + match self { + TableChild::TableRow(v) => v.to_plain_text(), + } + } +} + impl BuildXML for TableChild { fn build(&self) -> Vec { match self { @@ -42,6 +52,31 @@ impl Table { } } + pub fn to_plain_text(&self) -> String { + self.rows + .iter() + .map(|c| c.to_plain_text()) + .collect::>() + .join("/n") + } + + pub fn render(&mut self, dictionary: &HashMap) { + self.rows.iter_mut().for_each(|c| { + let TableChild::TableRow(tr) = c; + for trc in tr.cells.iter_mut() { + let TableRowChild::TableCell(tc) = trc; + for tcc in tc.children.iter_mut() { + match tcc { + TableCellContent::Paragraph(p) => p.render(dictionary), + TableCellContent::Table(t) => t.render(dictionary), + TableCellContent::StructuredDataTag(_) + | TableCellContent::TableOfContents(_) => (), + } + } + } + }); + } + pub fn without_borders(rows: Vec) -> Table { let property = TableProperty::without_borders(); let has_numbering = rows.iter().any(|c| c.has_numbering); diff --git a/docx-core/src/documents/elements/table_cell.rs b/docx-core/src/documents/elements/table_cell.rs index c757112ca..b2498a1ae 100644 --- a/docx-core/src/documents/elements/table_cell.rs +++ b/docx-core/src/documents/elements/table_cell.rs @@ -22,6 +22,16 @@ pub enum TableCellContent { TableOfContents(Box), } +impl TableCellContent { + pub fn to_plain_text(&self) -> String { + match self { + TableCellContent::Paragraph(v) => v.to_plain_text(), + TableCellContent::Table(v) => v.to_plain_text(), + _ => "".to_string(), + } + } +} + impl Serialize for TableCellContent { fn serialize(&self, serializer: S) -> Result where @@ -61,6 +71,14 @@ impl TableCell { Default::default() } + pub fn to_plain_text(&self) -> String { + self.children + .iter() + .map(|c| c.to_plain_text()) + .collect::>() + .join(" ") + } + pub fn add_paragraph(mut self, p: Paragraph) -> TableCell { if p.has_numbering { self.has_numbering = true diff --git a/docx-core/src/documents/elements/table_row.rs b/docx-core/src/documents/elements/table_row.rs index c52c3687f..a5c84a3b6 100644 --- a/docx-core/src/documents/elements/table_row.rs +++ b/docx-core/src/documents/elements/table_row.rs @@ -18,6 +18,14 @@ pub enum TableRowChild { TableCell(TableCell), } +impl TableRowChild { + pub fn to_plain_text(&self) -> String { + match self { + TableRowChild::TableCell(v) => v.to_plain_text(), + } + } +} + impl BuildXML for TableRowChild { fn build(&self) -> Vec { match self { @@ -38,6 +46,14 @@ impl TableRow { } } + pub fn to_plain_text(&self) -> String { + self.cells + .iter() + .map(|c| c.to_plain_text()) + .collect::>() + .join(" ") + } + pub fn grid_after(mut self, grid_after: u32) -> TableRow { self.property = self.property.grid_after(grid_after); self diff --git a/docx-core/src/documents/elements/text.rs b/docx-core/src/documents/elements/text.rs index aab291c27..f7f5c0dff 100644 --- a/docx-core/src/documents/elements/text.rs +++ b/docx-core/src/documents/elements/text.rs @@ -1,5 +1,6 @@ use serde::ser::{Serialize, SerializeStruct, Serializer}; use serde::Deserialize; +use std::collections::HashMap; use crate::documents::BuildXML; use crate::escape::escape; @@ -20,6 +21,45 @@ impl Text { } } + // VAR, e.g. ${VAR} + pub fn get_vars(&self) -> Vec { + let mut vars = Vec::new(); + let mut var = String::new(); + let mut in_var = false; + let mut start = false; + for c in self.text.chars() { + if c == '$' { + in_var = true; + } else if c == '{' { + if in_var { + start = true; + var.clear(); + } + } else if c == '}' { + if start { + vars.push(var.clone()); + start = false; + in_var = false; + } + } else if start { + var.push(c); + } + } + vars + } + + pub fn render(&mut self, dictionary: &HashMap) { + let vars = self.get_vars(); + let vars_replace = vars + .iter() + .map(|s| dictionary.get(s).unwrap_or(&String::new()).clone()) + .collect::>(); + for (var, replace) in vars.iter().zip(vars_replace.iter()) { + self.text = self.text.replace(&format!("${{{}}}", var), replace); + println!("{}", self.text); + } + } + pub(crate) fn without_escape(text: impl Into) -> Text { Text { text: text.into(), diff --git a/docx-core/src/documents/mod.rs b/docx-core/src/documents/mod.rs index 2feefafb3..bb4cfe9f2 100644 --- a/docx-core/src/documents/mod.rs +++ b/docx-core/src/documents/mod.rs @@ -73,6 +73,8 @@ pub use xml_docx::*; use serde::{ser, Serialize}; +use crate::{ReaderError, read_docx}; + #[derive(Debug, Clone)] pub struct Image(pub Vec); @@ -180,6 +182,20 @@ impl Docx { Default::default() } + pub fn read_file>(path: P) -> Result { + let buf = std::fs::read(path)?; + let docx = read_docx(&buf)?; + Ok(docx) + } + + pub fn to_plain_text(&self) -> String { + self.document.to_plain_text() + } + + pub fn render(&mut self, dictionary: &HashMap) { + self.document.render(dictionary); + } + pub fn document(mut self, d: Document) -> Docx { for child in &self.document.children { match child { diff --git a/docx-core/src/reader/errors.rs b/docx-core/src/reader/errors.rs index 141ed314f..a4bb562de 100644 --- a/docx-core/src/reader/errors.rs +++ b/docx-core/src/reader/errors.rs @@ -2,6 +2,10 @@ use thiserror::Error; #[derive(Error, Debug)] pub enum ReaderError { + /// IO errors + #[error("Failed to read file.")] + FileReadError(#[from] std::io::Error), + /// Zip errors #[error("Failed to read from zip.")] ZipError(#[from] zip::result::ZipError), #[error("Failed to parse int.")] diff --git a/docx-core/src/xml_builder/elements.rs b/docx-core/src/xml_builder/elements.rs index 77bfef034..7fba8b029 100644 --- a/docx-core/src/xml_builder/elements.rs +++ b/docx-core/src/xml_builder/elements.rs @@ -214,6 +214,8 @@ impl XMLBuilder { special_indent: Option, end: i32, start_chars: Option, + first_line_chars: Option, + hanging_chars: Option, ) -> Self { let start = &format!("{}", start.unwrap_or(0)); let end = &format!("{}", end); @@ -226,17 +228,32 @@ impl XMLBuilder { base = base.attr("w:leftChars", &start_chars_value); } + let mut value = String::new(); match special_indent { - Some(SpecialIndentType::FirstLine(v)) => self - .writer - .write(base.attr("w:firstLine", &format!("{}", v))) - .expect(EXPECT_MESSAGE), - Some(SpecialIndentType::Hanging(v)) => self - .writer - .write(base.attr("w:hanging", &format!("{}", v))) - .expect(EXPECT_MESSAGE), - _ => self.writer.write(base).expect(EXPECT_MESSAGE), + Some(SpecialIndentType::FirstLine(v)) => { + value = format!("{}", v); + base = base.attr("w:firstLine", &value) + } + Some(SpecialIndentType::Hanging(v)) => { + value = format!("{}", v); + base = base.attr("w:hanging", &value) + } + _ => (), }; + + let mut value1 = String::new(); + if first_line_chars.is_some() { + value1 = format!("{}", first_line_chars.unwrap()); + base = base.attr("w:firstLineChars", &value1); + } + + let mut value2 = String::new(); + if hanging_chars.is_some() { + value2 = format!("{}", hanging_chars.unwrap()); + base = base.attr("w:hangingChars", &value2); + } + + self.writer.write(base).expect(EXPECT_MESSAGE); self.close() } @@ -446,7 +463,11 @@ impl XMLBuilder { closed!(ul_trail_space, "w:ulTrailSpace"); closed!(do_not_expand_shift_return, "w:doNotExpandShiftReturn"); closed!(adjust_line_height_table, "w:adjustLineHeightInTable"); - closed!(character_spacing_control,"w:characterSpacingControl","w:val"); + closed!( + character_spacing_control, + "w:characterSpacingControl", + "w:val" + ); closed!(use_fe_layout, "w:useFELayout"); closed!( compat_setting, @@ -657,4 +678,23 @@ mod tests { r#""# ); } + + #[test] + fn test_indent_first_line_chars() { + let b = XMLBuilder::new(); + let r = b + .indent( + Some(20), + Some(SpecialIndentType::FirstLine(20)), + 20, + Some(20), + Some(20), + Some(20), + ) + .build(); + assert_eq!( + str::from_utf8(&r).unwrap(), + r#""# + ); + } }