Skip to content

Commit 9115175

Browse files
authored
Merge pull request #6454 from joewiz/bugfix/lucene-facet-drilldown-highlight
[bugfix] Preserve ft:highlight-field-matches under facet drill-down
2 parents 96883fc + 32f8353 commit 9115175

2 files changed

Lines changed: 139 additions & 2 deletions

File tree

extensions/indexes/lucene/src/main/java/org/exist/indexing/lucene/LuceneUtil.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import org.apache.lucene.index.Term;
3737
import org.apache.lucene.index.Terms;
3838
import org.apache.lucene.index.TermsEnum;
39+
import org.apache.lucene.queries.function.FunctionScoreQuery;
3940
import org.apache.lucene.search.*;
4041
import org.apache.lucene.util.AttributeSource;
4142
import org.apache.lucene.util.BytesRef;
@@ -163,6 +164,8 @@ public static void extractTerms(final Query query, final Map<Object, Query> term
163164
extractTermsFromTermRange(termRangeQuery, terms, reader, includeFields);
164165
case DrillDownQuery drillDownQuery ->
165166
extractTermsFromDrillDown(drillDownQuery, terms, reader, includeFields);
167+
case FunctionScoreQuery functionScoreQuery ->
168+
extractTerms(functionScoreQuery.getWrappedQuery(), terms, reader, includeFields);
166169
case null, default -> {
167170
query.visit(new QueryVisitor() {
168171
@Override
@@ -181,8 +184,11 @@ public void consumeTerms(Query query, Term... termsArray) {
181184
}
182185

183186
private static void extractTermsFromDrillDown(DrillDownQuery query, Map<Object, Query> terms, IndexReader reader, boolean includeFields) throws IOException {
184-
final Query rewritten = query.rewrite(new IndexSearcher(reader));
185-
extractTerms(rewritten, terms, reader, includeFields);
187+
// Extract terms from the base (content) query only. Rewriting a DrillDownQuery expands it
188+
// into a BooleanQuery that also carries the internal dimension-filter clauses (e.g.
189+
// $facets:kind$para), whose terms don't appear in document text and prevent correct
190+
// highlight extraction. getBaseQuery() returns the content query directly.
191+
extractTerms(query.getBaseQuery(), terms, reader, includeFields);
186192
}
187193

188194
private static void extractTermsFromBoolean(final BooleanQuery query, final Map<Object, Query> terms, final IndexReader reader, final boolean includeFields) throws IOException {
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
(:
2+
: eXist-db Open Source Native XML Database
3+
: Copyright (C) 2001 The eXist-db Authors
4+
:
5+
: info@exist-db.org
6+
: http://www.exist-db.org
7+
:
8+
: This library is free software; you can redistribute it and/or
9+
: modify it under the terms of the GNU Lesser General Public
10+
: License as published by the Free Software Foundation; either
11+
: version 2.1 of the License, or (at your option) any later version.
12+
:
13+
: This library is distributed in the hope that it will be useful,
14+
: but WITHOUT ANY WARRANTY; without even the implied warranty of
15+
: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16+
: Lesser General Public License for more details.
17+
:
18+
: You should have received a copy of the GNU Lesser General Public
19+
: License along with this library; if not, write to the Free Software
20+
: Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21+
:)
22+
xquery version "3.1";
23+
24+
(:~
25+
: Regression test: facet drill-down must not disable ft:highlight-field-matches.
26+
:
27+
: A faceted ft:query wraps the content query in a Lucene DrillDownQuery, which is opaque to the
28+
: term extraction ft:highlight-field-matches relies on. Storing that wrapped query on the match
29+
: silently produced empty highlights for every faceted search. The fix keeps the pre-drill-down
30+
: query for match/highlight extraction while searching with the drill-down query.
31+
:)
32+
module namespace fdh = "http://exist-db.org/xquery/lucene/test/facet-drilldown-highlight";
33+
34+
declare namespace test = "http://exist-db.org/xquery/xqsuite";
35+
declare namespace exist = "http://exist.sourceforge.net/NS/exist";
36+
37+
import module namespace ft = "http://exist-db.org/xquery/lucene";
38+
39+
declare variable $fdh:COLLECTION := "/db/lucene-test-facet-highlight";
40+
declare variable $fdh:CONFIG := "/db/system/config/db/lucene-test-facet-highlight";
41+
42+
declare variable $fdh:XCONF :=
43+
<collection xmlns="http://exist-db.org/collection-config/1.0">
44+
<index>
45+
<lucene>
46+
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
47+
<text qname="para">
48+
<field name="content" expression="."/>
49+
<facet dimension="kind" expression="'para'"/>
50+
</text>
51+
<text qname="caption">
52+
<field name="content" expression="."/>
53+
<facet dimension="kind" expression="'caption'"/>
54+
</text>
55+
</lucene>
56+
</index>
57+
</collection>;
58+
59+
declare variable $fdh:DOC1 :=
60+
<article>
61+
<section><para>The eXist-db array functions let you map and filter array members.</para></section>
62+
<figure><caption>An array diagram</caption></figure>
63+
</article>;
64+
65+
declare variable $fdh:DOC2 :=
66+
<article>
67+
<section><para>map:merge combines maps; array and map are XDM types.</para></section>
68+
</article>;
69+
70+
declare
71+
%test:setUp
72+
function fdh:setup() {
73+
let $_ := (xmldb:create-collection("/db/system", "config"), xmldb:create-collection("/db/system/config", "db"))
74+
let $conf := xmldb:create-collection("/db/system/config/db", "lucene-test-facet-highlight")
75+
let $col := xmldb:create-collection("/db", "lucene-test-facet-highlight")
76+
return (
77+
xmldb:store($conf, "collection.xconf", $fdh:XCONF),
78+
xmldb:store($col, "doc1.xml", $fdh:DOC1),
79+
xmldb:store($col, "doc2.xml", $fdh:DOC2),
80+
xmldb:reindex($col)
81+
)
82+
};
83+
84+
declare
85+
%test:tearDown
86+
function fdh:tearDown() {
87+
if (xmldb:collection-available($fdh:COLLECTION)) then xmldb:remove($fdh:COLLECTION) else (),
88+
if (xmldb:collection-available($fdh:CONFIG)) then xmldb:remove($fdh:CONFIG) else ()
89+
};
90+
91+
(: control: a plain (unfaceted) query highlights fine :)
92+
declare
93+
%test:assertTrue
94+
function fdh:plain-query-highlights() {
95+
let $hit := (collection($fdh:COLLECTION)//para[ft:query(., "content:(array)")])[1]
96+
return exists(ft:highlight-field-matches($hit, "content")//exist:match)
97+
};
98+
99+
(: the regression: a facet drill-down query must STILL highlight (was empty before the fix) :)
100+
declare
101+
%test:assertTrue
102+
function fdh:faceted-query-highlights() {
103+
let $opts := map { "facets": map { "kind": "para" } }
104+
let $hit := (collection($fdh:COLLECTION)//para[ft:query(., "content:(array)", $opts)])[1]
105+
return exists(ft:highlight-field-matches($hit, "content")//exist:match)
106+
};
107+
108+
(: the highlighted term is the queried term, under drill-down :)
109+
declare
110+
%test:assertTrue
111+
function fdh:faceted-highlight-marks-the-term() {
112+
let $opts := map { "facets": map { "kind": "para" } }
113+
let $hit := (collection($fdh:COLLECTION)//para[ft:query(., "content:(array)", $opts)])[1]
114+
return lower-case(string(ft:highlight-field-matches($hit, "content")//exist:match[1])) = "array"
115+
};
116+
117+
(: sanity: facet drill-down still SELECTS by facet value (kind=caption keeps the caption hit) :)
118+
declare
119+
%test:assertEquals(1)
120+
function fdh:drilldown-selects-facet() {
121+
let $opts := map { "facets": map { "kind": "caption" } }
122+
return count(collection($fdh:COLLECTION)//caption[ft:query(., "content:(array)", $opts)])
123+
};
124+
125+
(: sanity: facet drill-down still EXCLUDES other facet values (kind=para drops the caption) :)
126+
declare
127+
%test:assertEquals(0)
128+
function fdh:drilldown-excludes-other-facet() {
129+
let $opts := map { "facets": map { "kind": "para" } }
130+
return count(collection($fdh:COLLECTION)//caption[ft:query(., "content:(array)", $opts)])
131+
};

0 commit comments

Comments
 (0)