Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
import org.exist.collections.CollectionConfigurationManager;
import org.exist.dom.persistent.DocumentImpl;
import org.exist.dom.persistent.NodeProxy;
import org.exist.numbering.NodeId;
import org.exist.security.PermissionDeniedException;
import org.exist.storage.DBBroker;
import org.exist.xmldb.XmldbURI;
Expand Down Expand Up @@ -100,9 +99,9 @@ public Analyzer getAnalyzer() {

protected abstract void processText(CharSequence text, Document luceneDoc);

protected abstract void build(DBBroker broker, DocumentImpl document, NodeId nodeId, Document luceneDoc, CharSequence text);
protected abstract void build(DBBroker broker, NodeProxy contextNode, Document luceneDoc, CharSequence text);

protected void doBuild(DBBroker broker, DocumentImpl document, NodeId nodeId, Document luceneDoc, CharSequence text)
protected void doBuild(DBBroker broker, NodeProxy contextNode, Document luceneDoc, CharSequence text)
throws PermissionDeniedException, XPathException {
if (expression.isEmpty()) {
processText(text, luceneDoc);
Expand All @@ -116,19 +115,16 @@ protected void doBuild(DBBroker broker, DocumentImpl document, NodeId nodeId, Do
}

final XQuery xquery = broker.getBrokerPool().getXQueryService();
final NodeProxy currentNode = new NodeProxy(null, document, nodeId);
try {
Sequence result = xquery.execute(broker, compiled, currentNode);
Sequence result = xquery.execute(broker, compiled, contextNode);

if (!result.isEmpty()) {
processResult(result, luceneDoc);
}
} catch (PermissionDeniedException | XPathException e) {
isValid = false;
throw e;
} finally {
compiled.reset();
compiled.getContext().reset();
try { compiled.reset(); } finally { compiled.getContext().reset(); }
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@

import org.apache.lucene.document.Document;
import org.apache.lucene.facet.FacetField;
import org.exist.dom.persistent.DocumentImpl;
import org.exist.numbering.NodeId;
import org.exist.dom.persistent.NodeProxy;
import org.exist.security.PermissionDeniedException;
import org.exist.storage.DBBroker;
import org.exist.util.Configuration;
Expand Down Expand Up @@ -117,9 +116,9 @@ protected void processText(CharSequence text, Document luceneDoc) {
}
}

public void build(DBBroker broker, DocumentImpl document, NodeId nodeId, Document luceneDoc, CharSequence text) {
public void build(DBBroker broker, NodeProxy contextNode, Document luceneDoc, CharSequence text) {
try {
doBuild(broker, document, nodeId, luceneDoc, text);
doBuild(broker, contextNode, luceneDoc, text);
} catch (PermissionDeniedException e) {
LOG.warn("Permission denied while evaluating expression for facet '{}': {}", dimension, expression, e);
} catch (XPathException e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,7 @@
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.util.BytesRef;
import org.exist.dom.persistent.DocumentImpl;
import org.exist.dom.persistent.NodeProxy;
import org.exist.numbering.NodeId;
import org.exist.security.PermissionDeniedException;
import org.exist.storage.DBBroker;
import org.exist.util.Configuration;
Expand Down Expand Up @@ -123,10 +121,10 @@ public Analyzer getAnalyzer() {
}

@Override
protected void build(DBBroker broker, DocumentImpl document, NodeId nodeId, Document luceneDoc, CharSequence text) {
protected void build(DBBroker broker, NodeProxy contextNode, Document luceneDoc, CharSequence text) {
try {
if (checkCondition(broker, document, nodeId)) {
doBuild(broker, document, nodeId, luceneDoc, text);
if (checkCondition(broker, contextNode)) {
doBuild(broker, contextNode, luceneDoc, text);
}
} catch (XPathException e) {
LOG.warn("XPath error while evaluating expression for field named '{}': {}: {}", fieldName, expression, e.getMessage(), e);
Expand All @@ -135,7 +133,7 @@ protected void build(DBBroker broker, DocumentImpl document, NodeId nodeId, Docu
}
}

private boolean checkCondition(DBBroker broker, DocumentImpl document, NodeId nodeId) throws PermissionDeniedException, XPathException {
private boolean checkCondition(DBBroker broker, NodeProxy contextNode) throws PermissionDeniedException, XPathException {
if (condition.isEmpty()) {
return true;
}
Expand All @@ -148,16 +146,13 @@ private boolean checkCondition(DBBroker broker, DocumentImpl document, NodeId no
}

final XQuery xquery = broker.getBrokerPool().getXQueryService();
final NodeProxy currentNode = new NodeProxy(null, document, nodeId);
try {
Sequence result = xquery.execute(broker, compiledCondition, currentNode);
Sequence result = xquery.execute(broker, compiledCondition, contextNode);
return result != null && result.effectiveBooleanValue();
} catch (PermissionDeniedException | XPathException e) {
isValid = false;
throw e;
} finally {
compiledCondition.reset();
compiledCondition.getContext().reset();
try { compiledCondition.reset(); } finally { compiledCondition.getContext().reset(); }
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2070,14 +2070,16 @@ private void write() {
for (final PendingDoc pending : nodesToWrite) {
final Document doc = new Document();


final short nodeType = pending.qname.getNameType() == ElementValue.ATTRIBUTE
? Node.ATTRIBUTE_NODE : Node.ELEMENT_NODE;
final NodeProxy contextNode = new NodeProxy(null, currentDoc, pending.nodeId, nodeType);
List<AbstractFieldConfig> facetConfigs = pending.idxConf.getFacetsAndFields();
final ReindexScope scope = broker.getIndexController().getReindexScope();
facetConfigs.forEach(config -> {
if (scope == ReindexScope.VECTOR && !(config instanceof LuceneVectorFieldConfig)) {
return; // Vector-only: skip fulltext fields and facets
}
config.build(broker, currentDoc, pending.nodeId, doc, pending.text);
config.build(broker, contextNode, doc, pending.text);
});
// register field analyzers so indexing uses the same analyzer as querying
final LuceneConfig luceneConfig = pending.idxConf.getParent();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.apache.lucene.document.KnnFloatVectorField;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.exist.dom.persistent.DocumentImpl;
import org.exist.dom.persistent.NodeProxy;
import org.exist.indexing.ReindexScope;
import org.exist.numbering.NodeId;
import org.exist.security.PermissionDeniedException;
Expand Down Expand Up @@ -356,8 +357,10 @@ private static byte[] floatsToBytes(final float[] vec) {
}

@Override
protected void build(final DBBroker broker, final DocumentImpl document, final NodeId nodeId,
protected void build(final DBBroker broker, final NodeProxy contextNode,
final Document luceneDoc, final CharSequence text) {
final DocumentImpl document = contextNode.getOwnerDocument();
final NodeId nodeId = contextNode.getNodeId();
final ReindexScope scope = broker.getIndexController().getReindexScope();
try {
if (scope == ReindexScope.FULLTEXT && VECTOR_STORE_DB.equals(luceneConfig.getVectorStore())) {
Expand All @@ -372,7 +375,7 @@ protected void build(final DBBroker broker, final DocumentImpl document, final N
STORE_CONTEXT.set(new StoreContext(broker, document, nodeId));
}
try {
doBuild(broker, document, nodeId, luceneDoc, text);
doBuild(broker, contextNode, luceneDoc, text);
} finally {
if (embeddingLocal) {
STORE_CONTEXT.remove();
Expand All @@ -381,7 +384,7 @@ protected void build(final DBBroker broker, final DocumentImpl document, final N
} catch (IOException e) {
LOG.debug("vector.dbx read failed, falling back to XML: {}", e.getMessage());
try {
doBuild(broker, document, nodeId, luceneDoc, text);
doBuild(broker, contextNode, luceneDoc, text);
} catch (PermissionDeniedException | XPathException ex) {
LOG.warn("Error evaluating expression for vector field '{}': {}", fieldName, ex.getMessage());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@
@RunWith(XSuite.class)
@XSuite.XSuiteFiles({
"src/test/xquery/lucene/ft-query-field.xqm",
"src/test/xquery/lucene/self-axis-index.xqm"
"src/test/xquery/lucene/self-axis-index.xqm",
"src/test/xquery/lucene/field-expression-context.xqm"
})
public class LuceneIndexingTests {
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
(:
: eXist-db Open Source Native XML Database
: Copyright (C) 2001 The eXist-db Authors
:
: info@exist-db.org
: http://www.exist-db.org
:
: This library is free software; you can redistribute it and/or
: modify it under the terms of the GNU Lesser General Public
: License as published by the Free Software Foundation; either
: version 2.1 of the License, or (at your option) any later version.
:
: This library is distributed in the hope that it will be useful,
: but WITHOUT ANY WARRANTY; without even the implied warranty of
: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
: Lesser General Public License for more details.
:
: You should have received a copy of the GNU Lesser General Public
: License along with this library; if not, write to the Free Software
: Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
:)
xquery version "3.1";

(:~
: Regression for #6446: field @expression using self::* or . silently produced no
: indexed content, or threw XPTY0004 when the expression was passed to a module
: function with an element()-typed parameter. Root cause: the context NodeProxy was
: constructed without a node type (UNKNOWN_NODE_TYPE = -1), so NameTest.isOfType(-1)
: always returned false for wildcard element tests, and Type.NODE was not accepted as
: a subtype of element() in function-call argument checking.
:)
module namespace t = "http://exist-db.org/xquery/lucene/field-expression-context";

declare namespace test = "http://exist-db.org/xquery/xqsuite";
import module namespace ft = "http://exist-db.org/xquery/lucene";

declare variable $t:COLL := "/db/test-field-expr-ctx";
declare variable $t:CONF_COLL := "/db/system/config/db/" || substring-after($t:COLL, "/db/");

(:~ Strict element() parameter exposes the XPTY0004 failure path from #6446. :)
declare variable $t:MODULE := ``[xquery version "3.1";
module namespace idx = "http://exist-db.org/xquery/lucene/field-expr-ctx-lib";
declare function idx:get-metadata($root as element(), $field as xs:string) as xs:string {
normalize-space($root)
};
]``;

(:~
: vec attribute carries a dim=4 text-encoded float vector on the entry element.
: Using an attribute rather than a child element means XPath axes * and ./* still
: return only <form>, so module-call fields (mchildren, mdotchildren) are unaffected,
: and string(<entry>) remains "hello" so all text-field query assertions still hold.
:)
declare variable $t:XML := document { <entry vec="1.0 0.0 0.0 0.0"><form><orth>hello</orth></form></entry> };

declare variable $t:xconf :=
<collection xmlns="http://exist-db.org/collection-config/1.0">
<index>
<lucene>
<module uri="http://exist-db.org/xquery/lucene/field-expr-ctx-lib"
prefix="idx" at="field-expr-ctx-lib.xql"/>
<text qname="entry">
<!-- baselines: worked before fix -->
<field name="xdot" expression="."/>
<field name="xname" expression="self::entry"/>
<field name="xchildren" expression="*"/>
<field name="xdotchildren" expression="./*"/>
<!-- self::* regressions (#6446) -->
<field name="xself" expression="self::*"/>
<field name="xdotself" expression="./self::*"/>
<field name="xdotselfchild" expression="./self::*/form"/>
<!-- element()-typed module call regressions (#6446) -->
<field name="mdot" expression="idx:get-metadata(., 'x')"/>
<field name="mname" expression="idx:get-metadata(self::entry, 'x')"/>
<field name="mself" expression="idx:get-metadata(self::*, 'x')"/>
<field name="mdotself" expression="idx:get-metadata(./self::*, 'x')"/>
<field name="mdotselfchild" expression="idx:get-metadata(./self::*/form, 'x')"/>
<field name="mchildren" expression="idx:get-metadata(*, 'x')"/>
<field name="mdotchildren" expression="idx:get-metadata(./*, 'x')"/>
<!-- vector-field expression context regressions (#6446) -->
<!-- @vec is a dim=4 float attribute; self::*/@vec exercises the element-type
check that was broken: if the context NodeProxy had UNKNOWN_NODE_TYPE,
self::* returned empty, the attribute step had no context, and no vector
was indexed. -->
<vector-field name="v_baseline" expression="@vec" dimension="4" similarity="cosine" encoding="text"/>
<vector-field name="v_self" expression="self::*/@vec" dimension="4" similarity="cosine" encoding="text"/>
</text>
</lucene>
</index>
</collection>;

declare %test:setUp function t:setup() {
let $_ := (xmldb:create-collection("/db/system", "config"),
xmldb:create-collection("/db/system/config", "db"))
let $_ := (xmldb:create-collection("/db", substring-after($t:COLL, "/db/")),
xmldb:create-collection("/db/system/config/db", substring-after($t:COLL, "/db/")))
return (
xmldb:store($t:COLL, "field-expr-ctx-lib.xql", $t:MODULE, "application/xquery"),
xmldb:store($t:CONF_COLL, "collection.xconf", $t:xconf),
xmldb:store($t:COLL, "test.xml", $t:XML),
xmldb:reindex($t:COLL)
)
};

declare %test:tearDown function t:tearDown() {
if (xmldb:collection-available($t:COLL)) then xmldb:remove($t:COLL) else (),
if (xmldb:collection-available($t:CONF_COLL)) then xmldb:remove($t:CONF_COLL) else ()
};

declare function t:indexed($field as xs:string) as xs:boolean {
exists(collection($t:COLL)//entry[ft:query(., $field || ":(hello)")])
};

(:~ --- baselines --- :)
declare %test:assertTrue function t:xdot() { t:indexed("xdot") };
declare %test:assertTrue function t:xname() { t:indexed("xname") };
declare %test:assertTrue function t:xchildren() { t:indexed("xchildren") };
declare %test:assertTrue function t:xdotchildren() { t:indexed("xdotchildren") };

(:~ --- self::* expression regressions (#6446) --- :)
declare %test:assertTrue function t:xself() { t:indexed("xself") };
declare %test:assertTrue function t:xdotself() { t:indexed("xdotself") };
declare %test:assertTrue function t:xdotselfchild() { t:indexed("xdotselfchild") };

(:~ --- element()-typed module call regressions (#6446) --- :)
declare %test:assertTrue function t:mdot() { t:indexed("mdot") };
declare %test:assertTrue function t:mname() { t:indexed("mname") };
declare %test:assertTrue function t:mself() { t:indexed("mself") };
declare %test:assertTrue function t:mdotself() { t:indexed("mdotself") };
declare %test:assertTrue function t:mdotselfchild() { t:indexed("mdotselfchild") };
declare %test:assertTrue function t:mchildren() { t:indexed("mchildren") };
declare %test:assertTrue function t:mdotchildren() { t:indexed("mdotchildren") };

(:~ --- vector-field expression context regressions (#6446) --- :)
declare function t:vector-indexed($field as xs:string) as xs:boolean {
exists(collection($t:COLL)//entry[ft:query-field-vector($field, [1.0, 0.0, 0.0, 0.0], 1)])
};

declare %test:assertTrue function t:vector-baseline() { t:vector-indexed("v_baseline") };
declare %test:assertTrue function t:vector-xself() { t:vector-indexed("v_self") };
Loading
Loading