Skip to content

Lucene indexing fails if expression attribute contains self::* or function is called with self::* or . (dot) #6446

@daliboris

Description

@daliboris

Describe the bug
When Lucene's field @expression attribute is equal to self::* or contains self::* somewhere in the XPath expression, indexation fails with no error in the log.

When Lucene's field @expression attribute contains call of the function from the imported module (like nav:get-metadata(self::*, 'module-dot')), first argument is defined as element() and XPath expression with self::* is used for that argument, an error is thrown: ERROR XPTY0004: The actual cardinality for parameter 1 does not match the cardinality declared in the function's signature: idx:get-metadata($root as element(), $field as xs:string) as item()*. Expected cardinality: exactly one, got 0.

When dot (.) is used as a value of the first argument, the nav:get-metadata(., 'module-dot')]: err:XPTY0004 element()(entry-orth) is not a sub-type of element() [at line 4, column 18, source: String/6014374732715299956] is thrown (dot is interpreted as a string, not element node).

Expected behavior
XPath expressions containing self::* are well formed, they should be evaluated and content should be indexed like in the cases when the element name or dot is used in XPath expression, ie. self::tei:name or ..

The same behavior is expected when function is called and XPath expression in the parameter represents current node via . (dot).

To Reproduce

xquery version "3.1";
module namespace t="http://exist-db.org/xquery/test";
declare namespace test="http://exist-db.org/xquery/xqsuite";
declare namespace tei="http://www.tei-c.org/ns/1.0";
declare namespace exist = "http://exist.sourceforge.net/NS/exist";

declare variable $t:XML := document { 
 <entry xmlns="http://www.tei-c.org/ns/1.0"><form><orth>entry-orth</orth></form></entry>
};


declare variable $t:xconf :=
<collection xmlns="http://exist-db.org/collection-config/1.0">
    <index xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:tei="http://www.tei-c.org/ns/1.0">
        <lucene>
            <module uri="http://teipublisher.com/index" prefix="nav" at="index.xql" />
            <text qname="tei:entry">
                <!-- expressions -->
                <field name="expression-dot" expression="." /> <!-- works -->
                <field name="expression-self-name" expression="self::tei:entry" /> <!-- works -->
                <field name="expression-dot-self-asterisk" expression="./self::*" /> <!-- fails -->
                <field name="expression-dot-self-asterisk-child" expression="./self::*/tei:form" /> <!-- fails -->
                <field name="expression-self-asterisk" expression="self::*" /> <!-- fails -->
                <field name="expression-asterisk" expression="*" /> <!-- works -->
                <field name="expression-dot-asterisk" expression="./*" /> <!-- works -->
                <!-- module function calls -->
                <field name="module-dot" expression="nav:get-metadata(., 'module-dot')" /> <!-- fails -->
                <field name="module-self-name" expression="nav:get-metadata(self::tei:entry, 'module-self-name')" /> <!-- fails -->
                <field name="module-self-asterisk" expression="nav:get-metadata(self::*, 'module-self-asterisk')" /> <!-- fails -->
                <field name="module-dot-self-asterisk" expression="nav:get-metadata(./self::*, 'module-dot-self-asterisk')" /> <!-- fails -->
                <field name="module-dot-self-asterisk-child" expression="nav:get-metadata(./self::*/tei:form, 'module-dot-self-asterisk-child')" /> <!-- fails -->
                <field name="module-asterisk" expression="nav:get-metadata(*, 'module-asterisk')" /> <!-- fails -->
                <field name="module-dot-asterisk" expression="nav:get-metadata(./*, 'module-dot-asterisk')" /> <!-- fails -->
            </text>
        </lucene>
    </index>
</collection>;


declare variable $t:module := 'xquery version "3.1";

module namespace idx="http://teipublisher.com/index";
declare function idx:get-metadata($root as element(), $field as xs:string) {
    $field || ":" || normalize-space($root)
};';

declare variable $t:testCol := xmldb:create-collection("/db", "test");
declare variable $t:indexCol := xmldb:create-collection("/db/system/config/db", "test");


declare
    %test:setUp
function t:setup() {
        (
            xmldb:store($t:testCol, "index.xql", $t:module),
            xmldb:store($t:testCol, "test.xml", $t:XML),
            xmldb:store($t:indexCol, "collection.xconf", $t:xconf),
            xmldb:reindex("/db/test")
        )
};

declare
    %test:tearDown
function t:tearDown() {
    if (xmldb:collection-available($t:testCol)) then xmldb:remove($t:testCol) else (),
    if (xmldb:collection-available($t:indexCol)) then xmldb:remove($t:indexCol) else ()
};

(: expressions tests :)

declare 
%test:args("expression-dot") %test:assertEquals(1)
function t:expression-dot($field-name as xs:string) {
    t:get-field-index($field-name) 
};

declare 
%test:args("expression-self-name") %test:assertEquals(1)
function t:expression-self-name($field-name as xs:string) {
    t:get-field-index($field-name) 
};

declare 
%test:args("expression-self-asterisk") %test:assertEquals(1)
function t:expression-self-asterisk($field-name as xs:string) {
    t:get-field-index($field-name) 
};

declare 
%test:args("expression-dot-self-asterisk") %test:assertEquals(1)
function t:expression-dot-self-asterisk($field-name as xs:string) {
    t:get-field-index($field-name) 
};

declare 
%test:args("expression-dot-self-asterisk-child") %test:assertEquals(1)
function t:expression-dot-self-asterisk-child($field-name as xs:string) {
    t:get-field-index($field-name) 
};

declare 
%test:args("expression-asterisk") %test:assertEquals(1)
function t:expression-asterisk($field-name as xs:string) {
    t:get-field-index($field-name) 
};

declare 
%test:args("expression-dot-asterisk") %test:assertEquals(1)
function t:expression-dot-asterisk($field-name as xs:string) {
    t:get-field-index($field-name) 
};

(: module function calls tests :)

declare 
%test:args("module-dot") %test:assertEquals(1)
function t:module-dot($field-name as xs:string) {
    t:get-field-index($field-name) 
};

declare 
%test:args("module-self-name") %test:assertEquals(1)
function t:module-self-name($field-name as xs:string) {
    t:get-field-index($field-name) 
};


declare 
%test:args("module-self-asterisk") %test:assertEquals(1)
function t:module-self-asterisk($field-name as xs:string) {
    t:get-field-index($field-name) 
};

declare 
%test:args("module-dot-self-asterisk") %test:assertEquals(1)
function t:module-dot-self-asterisk($field-name as xs:string) {
    t:get-field-index($field-name) 
};

declare 
%test:args("module-dot-self-asterisk-child") %test:assertEquals(1)
function t:module-dot-self-asterisk-child($field-name as xs:string) {
    t:get-field-index($field-name) 
};

declare 
%test:args("module-asterisk") %test:assertEquals(1)
function t:module-asterisk($field-name as xs:string) {
    t:get-field-index($field-name) 
};

declare 
%test:args("module-dot-asterisk") %test:assertEquals(1)
function t:module-dot-asterisk($field-name as xs:string) {
    t:get-field-index($field-name) 
};


declare
function t:get-field-index($field-name as xs:string) {
    let $query-options := "map { 'leading-wildcard': 'yes', 'filter-rewrite': 'yes' }"
    let $field-to-retrieve := "ft:field(., '" || $field-name || "')"
    let $nodeset := 'collection("' || $t:testCol || '")//tei:entry'
    let $field-query := "[ft:query(., '" || $field-name || ":(*)', " || $query-options || ")] "
    let $namespace := 'declare namespace tei="http://www.tei-c.org/ns/1.0";' || "&#xa;"
    let $query := $namespace || $nodeset || $field-query || " ! " || $field-to-retrieve

    (: 
    example of created query string
    declare namespace tei="http://www.tei-c.org/ns/1.0"; 
    collection("/db/apps/test")//tei:entry[ft:query(., 
        'match-dot:(*)', 
        map { 'leading-wildcard': 'yes', 'filter-rewrite': 'yes' })
        ] 
        ! ft:field(., 'match-dot')
    :)
    let $result :=  util:eval($query)
    return count($result)
};

Context (please always complete the following information)

  • Build: eXist-7.0.0-SNAPSHOT (dce395f)
  • Java: 21.0.11 (Eclipse Adoptium)
  • OS: Linux 5.15.167.4-microsoft-standard-WSL2 (amd64)

Metadata

Metadata

Assignees

Labels

Luceneissue is related to Lucene or its integrationbugissue confirmed as bug

Type

No fields configured for Bug.

Projects

No projects

Relationships

None yet

Development

No branches or pull requests

Issue actions