From 8788d34bf56672ac0adb0dc7cc54034b2a95e5a5 Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Mon, 28 Nov 2016 13:05:58 +0900 Subject: [PATCH] WIP: make find faster --- src/xpath.jl | 109 +++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 101 insertions(+), 8 deletions(-) diff --git a/src/xpath.jl b/src/xpath.jl index d1b0013..68e4aa8 100644 --- a/src/xpath.jl +++ b/src/xpath.jl @@ -49,8 +49,7 @@ end Find the first node matching `xpath` XPath query from `doc`. """ function Base.findfirst(doc::Document, xpath::AbstractString) - # string("(", xpath, ")[position()=1]") may be faster - return first(find(doc, xpath)) + return findfirst(doc.node, xpath) end """ @@ -59,8 +58,7 @@ end Find the last node matching `xpath` XPath query from `doc`. """ function Base.findlast(doc::Document, xpath::AbstractString) - # string("(", xpath, ")[position()=last()]") may be faster - return last(find(doc, xpath)) + return findlast(doc.node, xpath) end """ @@ -71,6 +69,19 @@ Find nodes matching `xpath` XPath query starting from `node`. The `ns` argument is an iterator of namespace prefix and URI pairs. """ function Base.find(node::Node, xpath::AbstractString, ns=namespaces(node))::Vector{Node} + if is_asterisk(xpath) + return elements(node) + elseif is_ncname(xpath) + node_ptr = first_element_ptr(node.ptr) + nodes = Node[] + while node_ptr != C_NULL + if is_named_as(node_ptr, xpath) + push!(nodes, Node(node_ptr)) + end + node_ptr = next_element_ptr(node_ptr) + end + return nodes + end context_ptr = new_xpath_context(document(node)) if context_ptr == C_NULL throw_xml_error() @@ -104,8 +115,30 @@ end Find the first node matching `xpath` XPath query starting from `node`. """ function Base.findfirst(node::Node, xpath::AbstractString, ns=namespaces(node)) - # string("(", xpath, ")[position()=1]") may be faster - return first(find(node, xpath, ns)) + if is_asterisk(xpath) # any element + node_ptr = first_element_ptr(node.ptr) + if node_ptr == C_NULL + throw_no_matching_error() + end + return Node(node_ptr) + elseif is_ncname(xpath) # without namespace prefix + node_ptr = first_element_ptr(node.ptr) + while node_ptr != C_NULL + if is_named_as(node_ptr, xpath) + return Node(node_ptr) + end + node_ptr = next_element_ptr(node_ptr) + end + throw_no_matching_error() + #elseif is_qname(xpath) # with namespace prefix + # TODO + else + ret = find(node, xpath, ns) + if isempty(ret) + throw_no_matching_error() + end + return first(ret) + end end """ @@ -114,8 +147,34 @@ end Find the last node matching `xpath` XPath query starting from `node`. """ function Base.findlast(node::Node, xpath::AbstractString, ns=namespaces(node)) - # string("(", xpath, ")[position()=last()]") may be faster - return last(find(node, xpath, ns)) + if is_asterisk(xpath) # any element + node_ptr = last_element_ptr(node.ptr) + if node_ptr == C_NULL + throw_no_matching_error() + end + return Node(node_ptr) + elseif is_ncname(xpath) # without namespace prefix + node_ptr = last_element_ptr(node.ptr) + while node_ptr != C_NULL + if is_named_as(node_ptr, xpath) + return Node(node_ptr) + end + node_ptr = prev_element_ptr(node_ptr) + end + throw_no_matching_error() + # elseif is_qname(xpath) # with namespace prefix + # TODO + else + ret = find(node, xpath, ns) + if isempty(ret) + throw_no_matching_error() + end + return last(ret) + end +end + +function throw_no_matching_error() + throw(ArgumentError("no matching nodes")) end function new_xpath_context(doc) @@ -162,3 +221,37 @@ function free(ptr::Ptr{_XPathObject}) (Ptr{Void},), ptr) end + +# Check if `node` is named as `name` (same as `name(node) == name` but faster). +function is_named_as(node_ptr, name) + node_str = unsafe_load(node_ptr) + if node_str.name == C_NULL + return false + end + ret = ccall( + (:xmlStrEqual, libxml2), + Cint, + (Cstring, Cstring), + node_str.name, name) + return ret == 1 +end + +function is_asterisk(name) + return name == "*" +end + +function is_ncname(name) + return ccall( + (:xmlValidateNCName, libxml2), + Cint, + (Cstring, Cint), + name, 0) == 0 +end + +function is_qname(name) + return ccall( + (:xmlValidateQName, libxml2), + Cint, + (Cstring, Cint), + name, 0) == 0 +end