Skip to content

Commit d7b3e53

Browse files
saksdginev
authored andcommitted
add c14n for node and document
1 parent 13fbf1c commit d7b3e53

File tree

19 files changed

+657
-0
lines changed

19 files changed

+657
-0
lines changed

src/tree/c14n.rs

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
//! Shared canonicalization logic and types.
2+
//!
3+
use std::ffi::c_int;
4+
5+
use crate::bindings::{
6+
xmlC14NMode_XML_C14N_1_0, xmlC14NMode_XML_C14N_1_1, xmlC14NMode_XML_C14N_EXCLUSIVE_1_0,
7+
};
8+
9+
/// Options for configuring how to canonicalize XML
10+
#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default)]
11+
pub struct CanonicalizationOptions {
12+
/// Canonicalization specification to use
13+
pub mode: CanonicalizationMode,
14+
/// If true, keep `<!-- ... -->` comments, otherwise remove
15+
pub with_comments: bool,
16+
/// Namespaces to keep even if they are unused. By default, in [CanonicalizationMode::ExclusiveCanonical1_0], unused namespaces are removed.
17+
///
18+
/// Doesn't apply to other canonicalization modes.
19+
pub inclusive_ns_prefixes: Vec<String>,
20+
}
21+
22+
/// Canonicalization specification to use
23+
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default)]
24+
pub enum CanonicalizationMode {
25+
/// Original C14N 1.0 spec
26+
Canonical1_0,
27+
/// Exclusive C14N 1.0 spec
28+
#[default]
29+
ExclusiveCanonical1_0,
30+
/// C14N 1.1 spec
31+
Canonical1_1,
32+
}
33+
34+
impl From<CanonicalizationMode> for c_int {
35+
fn from(mode: CanonicalizationMode) -> Self {
36+
let c14n_mode = match mode {
37+
CanonicalizationMode::Canonical1_0 => xmlC14NMode_XML_C14N_1_0,
38+
CanonicalizationMode::ExclusiveCanonical1_0 => xmlC14NMode_XML_C14N_EXCLUSIVE_1_0,
39+
CanonicalizationMode::Canonical1_1 => xmlC14NMode_XML_C14N_1_1,
40+
};
41+
42+
c_int::from(c14n_mode as i32)
43+
}
44+
}

src/tree/document.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,3 +346,5 @@ impl Document {
346346
Ok(())
347347
}
348348
}
349+
350+
mod c14n;

src/tree/document/c14n.rs

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
//! Document canonicalization logic
2+
//!
3+
use std::ffi::{c_int, c_void, CString};
4+
use std::os::raw;
5+
use std::ptr::null_mut;
6+
7+
use crate::tree::c14n::*;
8+
9+
use super::{
10+
xmlAllocOutputBuffer, xmlC14NExecute, xmlC14NIsVisibleCallback, xmlChar, xmlNodePtr,
11+
xmlOutputBufferClose, xmlOutputBufferPtr, Document,
12+
};
13+
14+
impl Document {
15+
/// Canonicalize a document and return the results.
16+
pub fn canonicalize(
17+
&self,
18+
options: CanonicalizationOptions,
19+
callback: Option<(xmlNodePtr, xmlC14NIsVisibleCallback)>,
20+
) -> Result<String, ()> {
21+
let document = (*self.0).borrow().doc_ptr;
22+
23+
let mut ns_list_c = to_xml_string_vec(options.inclusive_ns_prefixes);
24+
let inclusive_ns_prefixes = ns_list_c.as_mut_ptr();
25+
let with_comments = c_int::from(options.with_comments);
26+
27+
let (is_visible_callback, user_data) = if let Some((node_ptr, visibility_callback)) = callback {
28+
(visibility_callback, node_ptr as *mut _)
29+
} else {
30+
(None, null_mut())
31+
};
32+
33+
let mode = options.mode.into();
34+
unsafe {
35+
let c_obuf = create_output_buffer();
36+
37+
let status = xmlC14NExecute(
38+
document,
39+
is_visible_callback,
40+
user_data,
41+
mode,
42+
inclusive_ns_prefixes,
43+
with_comments,
44+
c_obuf,
45+
);
46+
47+
let res = c_obuf_into_output(c_obuf);
48+
49+
if status < 0 {
50+
Err(())
51+
} else {
52+
Ok(res)
53+
}
54+
}
55+
}
56+
}
57+
58+
unsafe fn c_obuf_into_output(c_obuf: xmlOutputBufferPtr) -> String {
59+
let ctx_ptr = (*c_obuf).context;
60+
let output = Box::from_raw(ctx_ptr as *mut String);
61+
62+
(*c_obuf).context = std::ptr::null_mut::<c_void>();
63+
64+
xmlOutputBufferClose(c_obuf);
65+
66+
*output
67+
}
68+
69+
unsafe fn create_output_buffer() -> xmlOutputBufferPtr {
70+
let output = String::new();
71+
let ctx_ptr = Box::into_raw(Box::new(output));
72+
let encoder = std::ptr::null_mut();
73+
74+
let buf = xmlAllocOutputBuffer(encoder);
75+
76+
(*buf).writecallback = Some(xml_write_io);
77+
(*buf).closecallback = Some(xml_close_io);
78+
(*buf).context = ctx_ptr as _;
79+
80+
buf
81+
}
82+
83+
unsafe extern "C" fn xml_close_io(_context: *mut raw::c_void) -> raw::c_int {
84+
0
85+
}
86+
87+
unsafe extern "C" fn xml_write_io(
88+
io_ptr: *mut raw::c_void,
89+
buffer: *const raw::c_char,
90+
len: raw::c_int,
91+
) -> raw::c_int {
92+
if io_ptr.is_null() {
93+
0
94+
} else {
95+
let buf = std::slice::from_raw_parts_mut(buffer as *mut u8, len as usize);
96+
let buf = String::from_utf8_lossy(buf);
97+
let s2_ptr = io_ptr as *mut String;
98+
String::push_str(&mut *s2_ptr, &buf);
99+
100+
len
101+
}
102+
}
103+
104+
/// Create a [Vec] of null-terminated [*mut xmlChar] strings
105+
fn to_xml_string_vec(vec: Vec<String>) -> Vec<*mut xmlChar> {
106+
vec
107+
.into_iter()
108+
.map(|s| CString::new(s).unwrap().into_raw() as *mut xmlChar)
109+
.chain(std::iter::once(std::ptr::null_mut()))
110+
.collect()
111+
}

src/tree/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
//! The tree functionality
22
//!
33
4+
pub mod c14n;
45
pub mod document;
56
pub mod namespace;
67
pub mod node;

src/tree/node.rs

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1039,6 +1039,34 @@ impl Node {
10391039
context.findnodes(xpath, Some(self))
10401040
}
10411041

1042+
/// Search this node for XPath `path`, and return only the first match.
1043+
pub fn at_xpath(&self, path: &str, ns_binlings: &[(&str, &str)]) -> Result<Option<Node>, ()> {
1044+
let mut context = Context::from_node(self)?;
1045+
for (prefix, href) in ns_binlings {
1046+
context.register_namespace(prefix, href)?;
1047+
}
1048+
let nodes = context.findnodes(path, Some(self))?;
1049+
1050+
Ok(nodes.first().cloned())
1051+
}
1052+
1053+
/// Get a list of ancestor Node for this Node.
1054+
pub fn ancestors(&self) -> Vec<Node> {
1055+
let node_ptr = self.node_ptr();
1056+
1057+
let mut res = Vec::new();
1058+
1059+
let ancestor_ptrs = node_ancestors(node_ptr);
1060+
1061+
for ptr in ancestor_ptrs {
1062+
if let Some(node) = self.ptr_as_option(ptr) {
1063+
res.push(node)
1064+
}
1065+
}
1066+
1067+
res
1068+
}
1069+
10421070
/// find String values via xpath, at a specified node or the document root
10431071
pub fn findvalues(&self, xpath: &str) -> Result<Vec<String>, ()> {
10441072
let mut context = Context::from_node(self)?;
@@ -1085,3 +1113,26 @@ impl Node {
10851113
}
10861114
}
10871115
}
1116+
1117+
fn node_ancestors(node_ptr: xmlNodePtr) -> Vec<xmlNodePtr> {
1118+
if node_ptr.is_null() {
1119+
return Vec::new();
1120+
}
1121+
1122+
let mut parent_ptr = xmlGetParent(node_ptr);
1123+
1124+
if parent_ptr.is_null() {
1125+
Vec::new()
1126+
} else {
1127+
let mut parents = vec![parent_ptr];
1128+
1129+
while !xmlGetParent(parent_ptr).is_null() {
1130+
parent_ptr = xmlGetParent(parent_ptr);
1131+
parents.push(parent_ptr);
1132+
}
1133+
1134+
parents
1135+
}
1136+
}
1137+
1138+
mod c14n;

src/tree/node/c14n.rs

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
//! Node canonicalization logic
2+
//!
3+
use std::ffi::c_void;
4+
5+
use crate::{
6+
bindings::{xmlC14NIsVisibleCallback, xmlNodePtr},
7+
c_helpers::xmlGetNodeType,
8+
tree::{c14n::*, Node},
9+
};
10+
11+
use super::node_ancestors;
12+
13+
impl Node {
14+
/// Canonicalize a document and return the results.
15+
pub fn canonicalize(&mut self, options: CanonicalizationOptions) -> Result<String, ()> {
16+
let doc_ref = self.get_docref().upgrade().unwrap();
17+
let document = crate::tree::Document(doc_ref);
18+
19+
let user_data = self.node_ptr_mut().unwrap();
20+
let callback: xmlC14NIsVisibleCallback = Some(callback_wrapper);
21+
22+
document.canonicalize(options, Some((user_data, callback)))
23+
}
24+
}
25+
26+
unsafe extern "C" fn callback_wrapper(
27+
c14n_root_ptr: *mut c_void,
28+
node_ptr: xmlNodePtr,
29+
parent_ptr: xmlNodePtr,
30+
) -> ::std::os::raw::c_int {
31+
let c14n_root_ptr = c14n_root_ptr as xmlNodePtr;
32+
let node_type = xmlGetNodeType(node_ptr);
33+
34+
let tn_ptr = if NODE_TYPES.contains(&node_type) {
35+
node_ptr
36+
} else {
37+
parent_ptr
38+
};
39+
40+
let tn_ancestors = node_ancestors(tn_ptr);
41+
42+
let ret = (tn_ptr == c14n_root_ptr) || tn_ancestors.contains(&c14n_root_ptr);
43+
if ret {
44+
1
45+
} else {
46+
0
47+
}
48+
}
49+
50+
const NODE_TYPES: [u32; 7] = [
51+
super::xmlElementType_XML_ELEMENT_NODE,
52+
super::xmlElementType_XML_ATTRIBUTE_NODE,
53+
super::xmlElementType_XML_DOCUMENT_TYPE_NODE,
54+
super::xmlElementType_XML_TEXT_NODE,
55+
super::xmlElementType_XML_DTD_NODE,
56+
super::xmlElementType_XML_PI_NODE,
57+
super::xmlElementType_XML_COMMENT_NODE,
58+
];

0 commit comments

Comments
 (0)