Skip to content

Add firstLineCharsAndHangingChars #669

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions docx-core/examples/render.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
use std::collections::HashMap;

fn main() {
let path = r"C:\Users\33028\Desktop\test_render.docx";
let mut docx = docx_rs::Docx::read_file(path).unwrap();
let dictionary = HashMap::from([
("test1".to_string(), "岳卓".to_string()),
("test2".to_string(), "test2_value".to_string()),
("test3".to_string(), "test3_value".to_string()),
]);
docx.render(&dictionary);
let file = std::fs::File::create(r"C:\Users\33028\Desktop\test_render1.docx").unwrap();
docx.build().pack(file).unwrap();
}
9 changes: 9 additions & 0 deletions docx-core/examples/to_plain_text.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
use docx_rs::Docx;

fn main() {
let path =
std::path::Path::new(r"C:\Users\33028\Desktop\全市部分县区统计执法检查报告 (1).docx");
let docx = Docx::read_file(&path).unwrap();
let plain_text = docx.to_plain_text();
println!("{}", plain_text);
}
42 changes: 42 additions & 0 deletions docx-core/src/documents/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,48 @@ impl Document {
Default::default()
}

pub fn to_plain_text(&self) -> String {
let mut text = String::new();
for child in &self.children {
match child {
DocumentChild::Paragraph(p) => {
text.push_str(&p.to_plain_text());
}
DocumentChild::Table(t) => {
text.push_str(&t.to_plain_text());
}
DocumentChild::BookmarkStart(_) => {}
DocumentChild::BookmarkEnd(_) => {}
DocumentChild::CommentStart(_) => {}
DocumentChild::CommentEnd(_) => {}
DocumentChild::StructuredDataTag(_) => {}
DocumentChild::TableOfContents(_) => {}
}
}
text
}

pub fn get_vars(&self) -> Vec<String> {
let mut vars = Vec::new();
for c in self.children.iter() {
if let DocumentChild::Paragraph(p) = c {
vars.extend(p.get_vars())
}
}
vars
}

pub fn render(&mut self, dictionary: &HashMap<String,String>) {
for c in self.children.iter_mut() {
if let DocumentChild::Paragraph(p) = c {
p.render(dictionary);
}
if let DocumentChild::Table(t) = c {
t.render(dictionary);
}
}
}

pub fn add_paragraph(mut self, p: Paragraph) -> Self {
if p.has_numbering {
self.has_numbering = true
Expand Down
2 changes: 2 additions & 0 deletions docx-core/src/documents/elements/indent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ impl BuildXML for Indent {
self.special_indent,
self.end.unwrap_or_default(),
self.start_chars,
self.first_line_chars,
self.hanging_chars,
)
.build()
}
Expand Down
60 changes: 60 additions & 0 deletions docx-core/src/documents/elements/paragraph.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::collections::HashMap;

use serde::ser::{SerializeStruct, Serializer};
use serde::Serialize;

Expand Down Expand Up @@ -124,6 +126,64 @@ impl Paragraph {
Default::default()
}

pub fn to_plain_text(&self) -> String {
let mut s = "".to_string();
for c in self.children.iter() {
match c {
ParagraphChild::Run(run) => {
for c in run.children.iter() {
if let RunChild::Text(t) = c {
s.push_str(&t.text);
}
}
}
ParagraphChild::Insert(i) => {
for c in i.children.iter() {
if let InsertChild::Run(r) = c {
for c in r.children.iter() {
if let RunChild::Text(t) = c {
s.push_str(&t.text);
}
}
}
}
}
ParagraphChild::Delete(d) => {
for c in d.children.iter() {
if let DeleteChild::Run(r) = c {
for c in r.children.iter() {
if let RunChild::Text(t) = c {
s.push_str(&t.text);
}
}
}
}
}
_ => {}
}
}
s.push('\n');
s
}

pub fn get_vars(&self) -> Vec<String> {
let mut vars = Vec::new();
for c in self.children.iter() {
if let ParagraphChild::Run(r) = c {
vars.extend(r.get_vars())
}
}
vars
}

pub fn render(&mut self, dictionary: &HashMap<String, String>) {
for c in self.children.iter_mut() {
if let ParagraphChild::Run(r) = c {
r.render(dictionary)
}
}
}

pub fn id(mut self, id: impl Into<String>) -> Self {
self.id = id.into();
self
Expand Down
22 changes: 22 additions & 0 deletions docx-core/src/documents/elements/run.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::collections::HashMap;

use super::*;
use serde::ser::{SerializeStruct, Serializer};
use serde::Serialize;
Expand Down Expand Up @@ -135,6 +137,26 @@ impl Run {
}
}

pub fn get_vars(&self) -> Vec<String> {
let mut vars = Vec::new();

for c in self.children.iter() {
if let RunChild::Text(t) = c {
vars.extend(t.get_vars())
}
}

vars
}

pub fn render(&mut self, dictionary: &HashMap<String, String>) {
for c in self.children.iter_mut() {
if let RunChild::Text(t) = c {
t.render(dictionary)
}
}
}

pub fn add_text(mut self, text: impl Into<String>) -> Run {
self.children
.push(RunChild::Text(Text::new(text.into().replace('\n', ""))));
Expand Down
35 changes: 35 additions & 0 deletions docx-core/src/documents/elements/table.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::collections::HashMap;

use serde::ser::{SerializeStruct, Serializer};
use serde::Serialize;

Expand All @@ -20,6 +22,14 @@ pub enum TableChild {
TableRow(TableRow),
}

impl TableChild {
pub fn to_plain_text(&self) -> String {
match self {
TableChild::TableRow(v) => v.to_plain_text(),
}
}
}

impl BuildXML for TableChild {
fn build(&self) -> Vec<u8> {
match self {
Expand All @@ -42,6 +52,31 @@ impl Table {
}
}

pub fn to_plain_text(&self) -> String {
self.rows
.iter()
.map(|c| c.to_plain_text())
.collect::<Vec<_>>()
.join("/n")
}

pub fn render(&mut self, dictionary: &HashMap<String, String>) {
self.rows.iter_mut().for_each(|c| {
let TableChild::TableRow(tr) = c;
for trc in tr.cells.iter_mut() {
let TableRowChild::TableCell(tc) = trc;
for tcc in tc.children.iter_mut() {
match tcc {
TableCellContent::Paragraph(p) => p.render(dictionary),
TableCellContent::Table(t) => t.render(dictionary),
TableCellContent::StructuredDataTag(_)
| TableCellContent::TableOfContents(_) => (),
}
}
}
});
}

pub fn without_borders(rows: Vec<TableRow>) -> Table {
let property = TableProperty::without_borders();
let has_numbering = rows.iter().any(|c| c.has_numbering);
Expand Down
18 changes: 18 additions & 0 deletions docx-core/src/documents/elements/table_cell.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,16 @@ pub enum TableCellContent {
TableOfContents(Box<TableOfContents>),
}

impl TableCellContent {
pub fn to_plain_text(&self) -> String {
match self {
TableCellContent::Paragraph(v) => v.to_plain_text(),
TableCellContent::Table(v) => v.to_plain_text(),
_ => "".to_string(),
}
}
}

impl Serialize for TableCellContent {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
Expand Down Expand Up @@ -61,6 +71,14 @@ impl TableCell {
Default::default()
}

pub fn to_plain_text(&self) -> String {
self.children
.iter()
.map(|c| c.to_plain_text())
.collect::<Vec<_>>()
.join(" ")
}

pub fn add_paragraph(mut self, p: Paragraph) -> TableCell {
if p.has_numbering {
self.has_numbering = true
Expand Down
16 changes: 16 additions & 0 deletions docx-core/src/documents/elements/table_row.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,14 @@ pub enum TableRowChild {
TableCell(TableCell),
}

impl TableRowChild {
pub fn to_plain_text(&self) -> String {
match self {
TableRowChild::TableCell(v) => v.to_plain_text(),
}
}
}

impl BuildXML for TableRowChild {
fn build(&self) -> Vec<u8> {
match self {
Expand All @@ -38,6 +46,14 @@ impl TableRow {
}
}

pub fn to_plain_text(&self) -> String {
self.cells
.iter()
.map(|c| c.to_plain_text())
.collect::<Vec<_>>()
.join(" ")
}

pub fn grid_after(mut self, grid_after: u32) -> TableRow {
self.property = self.property.grid_after(grid_after);
self
Expand Down
40 changes: 40 additions & 0 deletions docx-core/src/documents/elements/text.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use serde::ser::{Serialize, SerializeStruct, Serializer};
use serde::Deserialize;
use std::collections::HashMap;

use crate::documents::BuildXML;
use crate::escape::escape;
Expand All @@ -20,6 +21,45 @@ impl Text {
}
}

// VAR, e.g. ${VAR}
pub fn get_vars(&self) -> Vec<String> {
let mut vars = Vec::new();
let mut var = String::new();
let mut in_var = false;
let mut start = false;
for c in self.text.chars() {
if c == '$' {
in_var = true;
} else if c == '{' {
if in_var {
start = true;
var.clear();
}
} else if c == '}' {
if start {
vars.push(var.clone());
start = false;
in_var = false;
}
} else if start {
var.push(c);
}
}
vars
}

pub fn render(&mut self, dictionary: &HashMap<String, String>) {
let vars = self.get_vars();
let vars_replace = vars
.iter()
.map(|s| dictionary.get(s).unwrap_or(&String::new()).clone())
.collect::<Vec<_>>();
for (var, replace) in vars.iter().zip(vars_replace.iter()) {
self.text = self.text.replace(&format!("${{{}}}", var), replace);
println!("{}", self.text);
}
}

pub(crate) fn without_escape(text: impl Into<String>) -> Text {
Text {
text: text.into(),
Expand Down
16 changes: 16 additions & 0 deletions docx-core/src/documents/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ pub use xml_docx::*;

use serde::{ser, Serialize};

use crate::{ReaderError, read_docx};

#[derive(Debug, Clone)]
pub struct Image(pub Vec<u8>);

Expand Down Expand Up @@ -180,6 +182,20 @@ impl Docx {
Default::default()
}

pub fn read_file<P: AsRef<std::path::Path>>(path: P) -> Result<Docx, ReaderError> {
let buf = std::fs::read(path)?;
let docx = read_docx(&buf)?;
Ok(docx)
}

pub fn to_plain_text(&self) -> String {
self.document.to_plain_text()
}

pub fn render(&mut self, dictionary: &HashMap<String, String>) {
self.document.render(dictionary);
}

pub fn document(mut self, d: Document) -> Docx {
for child in &self.document.children {
match child {
Expand Down
4 changes: 4 additions & 0 deletions docx-core/src/reader/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@ use thiserror::Error;

#[derive(Error, Debug)]
pub enum ReaderError {
/// IO errors
#[error("Failed to read file.")]
FileReadError(#[from] std::io::Error),
/// Zip errors
#[error("Failed to read from zip.")]
ZipError(#[from] zip::result::ZipError),
#[error("Failed to parse int.")]
Expand Down
Loading