Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ private Sequence evalHighlightFieldMatches(final Sequence[] args) throws XPathEx
continue;
}
final Sequence fieldValues = getFieldValues(fieldName, Type.STRING, ownerDoc.getDocId(), proxy.getNodeId(), index);
final Sequence highlighted = highlightMatches(fieldName, proxy, match, fieldValues);
final Sequence highlighted = highlightMatches(context, fieldName, proxy, match, fieldValues);
for (final SequenceIterator hi = highlighted.iterate(); hi.hasNext(); ) {
result.add(hi.nextItem());
}
Expand Down Expand Up @@ -220,6 +220,7 @@ private Sequence getFieldValues(final String fieldName, final int type, final in
/**
* Highlight matches in field content using the analyzer defined for the field.
*
* @param context the query context (for the broker and the in-memory result builder)
* @param fieldName the name of the field
* @param proxy node on which the field is defined
* @param match the lucene match attached to the node
Expand All @@ -228,7 +229,7 @@ private Sequence getFieldValues(final String fieldName, final int type, final in
* @throws XPathException in case of error
* @throws IOException in case of a lucene error
*/
private Sequence highlightMatches(final String fieldName, final NodeProxy proxy, final LuceneMatch match, final Sequence text) throws XPathException, IOException {
static Sequence highlightMatches(final XQueryContext context, final String fieldName, final NodeProxy proxy, final LuceneMatch match, final Sequence text) throws XPathException, IOException {
final LuceneIndexWorker index = (LuceneIndexWorker) context.getBroker().getIndexController().getWorkerByIndexId(LuceneIndex.ID);
final Map<Object, Query> terms = index.getTerms(match.getQuery());
final NodePath path = LuceneMatchListener.getPath(proxy);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ public class LuceneModule extends AbstractInternalModule {
new FunctionDef(Search.signatures[0], Search.class),
new FunctionDef(Search.signatures[1], Search.class),
new FunctionDef(Search.signatures[2], Search.class),
new FunctionDef(QueryScope.signatures[0], QueryScope.class),
new FunctionDef(QueryScope.signatures[1], QueryScope.class),
new FunctionDef(SearchScope.signatures[0], SearchScope.class),
new FunctionDef(SearchScope.signatures[1], SearchScope.class),
new FunctionDef(GetField.signatures[0], GetField.class),
new FunctionDef(Facets.signatures[0], Facets.class),
new FunctionDef(Facets.signatures[1], Facets.class),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
/*
* eXist-db Open Source Native XML Database
* Copyright (C) 2001 The eXist-db Authors
*
* info@exist-db.org
* http://www.exist-db.org
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
package org.exist.xquery.modules.lucene;

import org.apache.lucene.queryparser.classic.ParseException;
import org.exist.collections.Collection;
import org.exist.dom.persistent.DefaultDocumentSet;
import org.exist.dom.persistent.LockedDocument;
import org.exist.dom.persistent.MutableDocumentSet;
import org.exist.dom.persistent.NodeSet;
import org.exist.indexing.lucene.LuceneIndex;
import org.exist.indexing.lucene.LuceneIndexWorker;
import org.exist.security.PermissionDeniedException;
import org.exist.storage.lock.Lock.LockMode;
import org.exist.util.LockException;
import org.exist.xmldb.XmldbURI;
import org.exist.xquery.BasicFunction;
import org.exist.xquery.XPathException;
import org.exist.xquery.value.NodeValue;
import org.exist.xquery.value.Sequence;
import org.exist.xquery.value.SequenceIterator;
import org.exist.xquery.value.Type;
import org.w3c.dom.Element;

import java.io.IOException;

/**
* Shared scope-resolution and index-first query execution for the collection-scoped Lucene functions
* ({@link QueryScope} and {@link SearchScope}). Both resolve a sequence of collection/document URIs to
* a {@code DocumentSet} and run the Lucene query directly over it — with a <b>null context set</b>
* (index-first; no descendant-of constraint) and <b>null qnames</b> (all defined indexes) — so the
* result is every matching indexed node, of any element type, carrying its score and matches.
*/
final class LuceneScope {

private LuceneScope() {
}

/**
* Resolve a sequence of collection or document URIs to a document set. Collection URIs are searched
* recursively (including sub-collections); a URI that is not a collection is tried as a single document.
*/
static MutableDocumentSet resolveScope(final BasicFunction fn, final Sequence scope) throws XPathException {
final MutableDocumentSet docs = new DefaultDocumentSet();
for (final SequenceIterator i = scope.iterate(); i.hasNext(); ) {
final String path = i.nextItem().getStringValue();
final XmldbURI uri = XmldbURI.create(path);
try (final Collection coll = fn.getContext().getBroker().openCollection(uri, LockMode.READ_LOCK)) {
if (coll != null) {
coll.allDocs(fn.getContext().getBroker(), docs, true, fn.getContext().getProtectedDocs());
} else {
// not a collection: try it as a single document
try (final LockedDocument lockedDoc = fn.getContext().getBroker().getXMLResource(uri, LockMode.READ_LOCK)) {
if (lockedDoc != null) {
docs.add(lockedDoc.getDocument());
}
}
}
} catch (final PermissionDeniedException e) {
throw new XPathException(fn, LuceneModule.EXXQDYFT0001, "Permission denied to access '" + path + "'");
} catch (final LockException e) {
throw new XPathException(fn, LuceneModule.EXXQDYFT0002, "Lock error while accessing '" + path + "': " + e.getMessage());
}
}
return docs;
}

/**
* Run the index-first query over {@code docs}. {@code queryArg} is either a Lucene query string or an
* XML query element (an empty query matches all indexed nodes in scope).
*
* @return the matching nodes, each carrying its Lucene score and matches.
*/
static NodeSet query(final BasicFunction fn, final Sequence contextSequence, final MutableDocumentSet docs,
final Sequence queryArg, final QueryOptions options) throws XPathException {
final LuceneIndexWorker index = (LuceneIndexWorker) fn.getContext().getBroker()
.getIndexController().getWorkerByIndexId(LuceneIndex.ID);
try {
if (!queryArg.isEmpty() && Type.subTypeOf(queryArg.itemAt(0).getType(), Type.ELEMENT)) {
final Element queryXml = (Element) ((NodeValue) queryArg.itemAt(0)).getNode();
return index.query(fn.getExpressionId(), docs, null, null, queryXml, NodeSet.DESCENDANT, options);
} else {
final String query = queryArg.isEmpty() ? null : queryArg.itemAt(0).getStringValue();
return index.query(fn.getExpressionId(), docs, null, null, query, NodeSet.DESCENDANT, options);
}
} catch (final IOException | ParseException e) {
throw new XPathException(fn, LuceneModule.EXXQDYFT0002, "Error while querying full text index: " + e.getMessage());
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
/*
* eXist-db Open Source Native XML Database
* Copyright (C) 2001 The eXist-db Authors
*
* info@exist-db.org
* http://www.exist-db.org
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
package org.exist.xquery.modules.lucene;

import org.exist.dom.QName;
import org.exist.dom.persistent.MutableDocumentSet;
import org.exist.xquery.BasicFunction;
import org.exist.xquery.Cardinality;
import org.exist.xquery.FunctionSignature;
import org.exist.xquery.XPathException;
import org.exist.xquery.XQueryContext;
import org.exist.xquery.value.FunctionParameterSequenceType;
import org.exist.xquery.value.FunctionReturnSequenceType;
import org.exist.xquery.value.Sequence;
import org.exist.xquery.value.SequenceType;
import org.exist.xquery.value.Type;

/**
* {@code ft:query-scope($scope, $query, $options?)} — an <em>index-first</em> Lucene search.
*
* Unlike {@code ft:query}, which evaluates relative to an XPath context node set, this function
* queries the Lucene index directly over the documents in {@code $scope} and returns <em>all</em>
* matching nodes — of any indexed element type — with their Lucene scores and match highlighting
* attached, exactly as {@code ft:query} results carry them. Consequently:
*
* <ul>
* <li>relevance is correct for every hit regardless of how deeply the matched element is nested
* (it avoids the {@code //*} descendant-wildcard {@code ft:score}-loss artifact by never
* using an XPath node set as the query unit), and</li>
* <li>it is element-name independent: no need to enumerate or union the contributing element
* types, so content producers stay decoupled from the search aggregator.</li>
* </ul>
*
* The result is an ordinary node set, so {@code ft:score}, {@code ft:facets}, {@code ft:field} and
* {@code ft:highlight-field-matches} compose on it as usual. This is the focused, live-node primitive
* of the "eXlasticSearch" field-first search design; its detached, map-returning companion that
* assembles the Elasticsearch {@code _search}-style result map (total/hits/fields/facets) is
* {@link SearchScope} ({@code ft:search-scope}).
*/
public class QueryScope extends BasicFunction {

private static final FunctionParameterSequenceType FS_PARAM_SCOPE =
new FunctionParameterSequenceType("scope", Type.STRING, Cardinality.ZERO_OR_MORE,
"Collection (or document) URIs to search. Collection URIs are searched recursively.");
private static final FunctionParameterSequenceType FS_PARAM_QUERY =
new FunctionParameterSequenceType("query", Type.ITEM, Cardinality.ZERO_OR_ONE,
"The query: a string in Lucene's default query syntax (e.g. \"site-content:(array)\") "
+ "or an XML query element. An empty query matches all indexed nodes in scope.");
private static final FunctionParameterSequenceType FS_PARAM_OPTIONS =
new FunctionParameterSequenceType("options", Type.ITEM, Cardinality.EXACTLY_ONE,
"Query options as an XML fragment or an XDM map (same options as ft:query, including facet drill-down).");
private static final FunctionReturnSequenceType FS_RETURN =
new FunctionReturnSequenceType(Type.NODE, Cardinality.ZERO_OR_MORE,
"All indexed nodes in scope matching the query, each carrying its Lucene score "
+ "(via ft:score) and match information.");

public static final FunctionSignature[] signatures = {
new FunctionSignature(
new QName("query-scope", LuceneModule.NAMESPACE_URI, LuceneModule.PREFIX),
"Index-first Lucene search over a scope of collections/documents. Returns all matching "
+ "indexed nodes (any element type) with scores attached, avoiding the XPath node-set "
+ "scoring artifacts of ft:query over a descendant-axis wildcard.",
new SequenceType[]{FS_PARAM_SCOPE, FS_PARAM_QUERY},
FS_RETURN),
new FunctionSignature(
new QName("query-scope", LuceneModule.NAMESPACE_URI, LuceneModule.PREFIX),
"Index-first Lucene search over a scope of collections/documents, with query options. "
+ "Returns all matching indexed nodes (any element type) with scores attached.",
new SequenceType[]{FS_PARAM_SCOPE, FS_PARAM_QUERY, FS_PARAM_OPTIONS},
FS_RETURN)
};

public QueryScope(final XQueryContext context, final FunctionSignature signature) {
super(context, signature);
}

@Override
public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException {
if (args[0].isEmpty()) {
return Sequence.EMPTY_SEQUENCE;
}

final MutableDocumentSet docs = LuceneScope.resolveScope(this, args[0]);
if (docs.getDocumentCount() == 0) {
return Sequence.EMPTY_SEQUENCE;
}

// options is the 3rd argument (1-based position 3: scope, query, options), as in ft:query.
// parseOptions short-circuits to default QueryOptions when getArgumentCount() < 3, so the
// 2-argument form never dereferences a missing argument.
final QueryOptions options = Query.parseOptions(this, contextSequence, null, 3);

return LuceneScope.query(this, contextSequence, docs, args[1], options);
}
}
Loading
Loading