Skip to content

Commit 1aede48

Browse files
committed
Add a method:dv to json Range Facet which should be faster for large numbers of buckets
1 parent d217b2e commit 1aede48

File tree

2 files changed

+176
-10
lines changed

2 files changed

+176
-10
lines changed

solr/core/src/java/org/apache/solr/search/facet/FacetRange.java

+175-10
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,18 @@
1818

1919
import java.io.IOException;
2020
import java.util.ArrayList;
21+
import java.util.Arrays;
2122
import java.util.Date;
2223
import java.util.EnumSet;
2324
import java.util.HashMap;
2425
import java.util.List;
2526
import java.util.Map;
2627

28+
import org.apache.lucene.index.DocValues;
29+
import org.apache.lucene.index.LeafReaderContext;
30+
import org.apache.lucene.index.NumericDocValues;
2731
import org.apache.lucene.search.Query;
32+
import org.apache.lucene.search.SimpleCollector;
2833
import org.apache.lucene.util.NumericUtils;
2934
import org.apache.solr.common.SolrException;
3035
import org.apache.solr.common.params.FacetParams;
@@ -35,18 +40,41 @@
3540
import org.apache.solr.schema.TrieDateField;
3641
import org.apache.solr.schema.TrieField;
3742
import org.apache.solr.search.DocSet;
43+
import org.apache.solr.search.DocSetBuilder;
44+
import org.apache.solr.search.DocSetUtil;
3845
import org.apache.solr.util.DateMathParser;
3946

4047
import static org.apache.solr.search.facet.FacetContext.SKIP_FACET;
4148

4249
public class FacetRange extends FacetRequestSorted {
50+
51+
public enum FacetMethod {
52+
DV, // Does a single pass using DocValues to sift into buckets
53+
ENUM, // Uses a RangeQuery for each bucket
54+
;
55+
56+
public static FacetRange.FacetMethod fromString(String method) {
57+
if (method == null || method.length() == 0) return ENUM;
58+
switch (method) {
59+
case "dv":
60+
return DV;
61+
case "enum":
62+
return ENUM;
63+
default:
64+
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown FacetRange method " + method);
65+
}
66+
}
67+
}
68+
69+
4370
String field;
4471
Object start;
4572
Object end;
4673
Object gap;
4774
boolean hardend = false;
4875
EnumSet<FacetParams.FacetRangeInclude> include;
4976
EnumSet<FacetParams.FacetRangeOther> others;
77+
FacetMethod method;
5078

5179
{
5280
// defaults
@@ -114,6 +142,26 @@ public Range(Object label, Comparable low, Comparable high, boolean includeLower
114142
this.includeLower = includeLower;
115143
this.includeUpper = includeUpper;
116144
}
145+
146+
public boolean contains(Comparable val) {
147+
if (low != null) {
148+
if (includeLower && val.compareTo(low) < 0) {
149+
return false;
150+
} else if (!includeLower && val.compareTo(low) <= 0) {
151+
return false;
152+
}
153+
}
154+
155+
if (high != null) {
156+
if (includeUpper && val.compareTo(high) > 0) {
157+
return false;
158+
} else if (!includeUpper && val.compareTo(high) >= 0) {
159+
return false;
160+
}
161+
}
162+
163+
return true;
164+
}
117165
}
118166

119167
public static Calc getNumericCalc(SchemaField sf) {
@@ -304,14 +352,26 @@ private SimpleOrderedMap getRangeCountsIndexed() throws IOException {
304352

305353
createAccs(fcontext.base.size(), slotCount);
306354

355+
FacetRangeMethod rangeMethod;
356+
if (freq.method == FacetRange.FacetMethod.DV) {
357+
if (!sf.hasDocValues() || sf.multiValued()) {
358+
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
359+
"Facet range method " + freq.method + " only works for single valued numeric fields with docValues");
360+
}
361+
rangeMethod = new FacetRangeByDocValues();
362+
} else {
363+
rangeMethod = new FacetRangeByQuery();
364+
}
365+
307366
for (int idx = 0; idx<rangeList.size(); idx++) {
308-
rangeStats(rangeList.get(idx), idx);
367+
rangeMethod.processRange(rangeList.get(idx), idx);
309368
}
310369

311370
for (int idx = 0; idx<otherList.size(); idx++) {
312-
rangeStats(otherList.get(idx), rangeList.size() + idx);
371+
rangeMethod.processRange(otherList.get(idx), rangeList.size() + idx);
313372
}
314373

374+
rangeMethod.finish();
315375

316376
final SimpleOrderedMap res = new SimpleOrderedMap<>();
317377
List<SimpleOrderedMap> buckets = new ArrayList<>();
@@ -341,14 +401,9 @@ private SimpleOrderedMap getRangeCountsIndexed() throws IOException {
341401

342402
private Query[] filters;
343403
private DocSet[] intersections;
344-
private void rangeStats(Range range, int slot) throws IOException {
345-
Query rangeQ = sf.getType().getRangeQuery(null, sf, range.low == null ? null : calc.formatValue(range.low), range.high==null ? null : calc.formatValue(range.high), range.includeLower, range.includeUpper);
346-
// TODO: specialize count only
347-
DocSet intersection = fcontext.searcher.getDocSet(rangeQ, fcontext.base);
348-
filters[slot] = rangeQ;
349-
intersections[slot] = intersection; // save for later // TODO: only save if number of slots is small enough?
350-
int num = collect(intersection, slot);
351-
countAcc.incrementCount(slot, num); // TODO: roll this into collect()
404+
405+
private Query buildRangeQuery(Range range) {
406+
return sf.getType().getRangeQuery(null, sf, range.low == null ? null : calc.formatValue(range.low), range.high==null ? null : calc.formatValue(range.high), range.includeLower, range.includeUpper);
352407
}
353408

354409
private void doSubs(SimpleOrderedMap bucket, int slot) throws IOException {
@@ -378,8 +433,118 @@ private SimpleOrderedMap<Object> rangeStats(Range range, boolean special ) thro
378433
return bucket;
379434
}
380435

436+
abstract class FacetRangeMethod {
437+
void processRange(Range range, int slot) throws IOException {
438+
filters[slot] = buildRangeQuery(range);
439+
doOneRange(range, slot);
440+
}
441+
abstract void doOneRange(Range range, int slot) throws IOException;
442+
abstract void finish() throws IOException;
443+
}
381444

445+
// Gathers the stats for each Range bucket by using a RangeQuery to run a search.
446+
// Suitable when the number of buckets is fairly low, or the base DocSet is big
447+
class FacetRangeByQuery extends FacetRangeMethod {
382448

449+
@Override
450+
void doOneRange(Range range, int slot) throws IOException {
451+
// TODO: specialize count only
452+
intersections[slot] = fcontext.searcher.getDocSet(filters[slot], fcontext.base);
453+
int num = collect(intersections[slot], slot);
454+
countAcc.incrementCount(slot, num); // TODO: roll this into collect()
455+
}
456+
457+
@Override
458+
void finish() throws IOException { }
459+
}
460+
461+
// Gathers the stats by making a single pass over the base DocSet, using
462+
// the docValue for the field to sift into the appropriate Range buckets.
463+
// Suitable when the gap leads to many interval buckets, especially if this is a
464+
// subfacet inside a parent with many buckets of its own. However, this method
465+
// can be slower if the base DocSet is big
466+
class FacetRangeByDocValues extends FacetRangeMethod {
467+
468+
private DocSetBuilder[] builders;
469+
private Comparable[] starts;
470+
471+
FacetRangeByDocValues() {
472+
builders = new DocSetBuilder[intersections.length];
473+
starts = new Comparable[rangeList.size()];
474+
}
475+
476+
@Override
477+
void doOneRange(Range range, int slot) throws IOException {
478+
builders[slot] = new DocSetBuilder(fcontext.searcher.maxDoc(), fcontext.base.size() >> 2);
479+
if (slot < starts.length) {
480+
starts[slot] = range.low;
481+
}
482+
}
483+
484+
@Override
485+
void finish() throws IOException {
486+
DocSetUtil.collectSortedDocSet(fcontext.base, fcontext.searcher.getIndexReader(), new SimpleCollector() {
487+
int docBase;
488+
NumericDocValues values = null;
489+
490+
@Override
491+
public boolean needsScores() {
492+
return false;
493+
}
494+
495+
@Override
496+
protected void doSetNextReader(LeafReaderContext ctx) throws IOException {
497+
docBase = ctx.docBase;
498+
values = DocValues.getNumeric(ctx.reader(), sf.getName());
499+
}
500+
501+
@Override
502+
public void collect(int segDoc) throws IOException {
503+
if (values.advanceExact(segDoc)) {
504+
placeDocId(values.longValue(), docBase + segDoc);
505+
}
506+
}
507+
}
508+
);
509+
510+
for (int slot = 0; slot<builders.length; slot++) {
511+
intersections[slot] = builders[slot].buildUniqueInOrder(null);
512+
int num = collect(intersections[slot], slot);
513+
countAcc.incrementCount(slot, num);
514+
}
515+
}
516+
517+
void placeDocId(long val, int docId) {
518+
Comparable comparableVal = calc.bitsToValue(val);
519+
520+
int insertionPoint = Arrays.binarySearch(starts, comparableVal);
521+
522+
int slot;
523+
if (insertionPoint >= 0) {
524+
if (rangeList.get(insertionPoint).includeLower) {
525+
slot = insertionPoint;
526+
} else {
527+
slot = insertionPoint - 1;
528+
}
529+
} else {
530+
slot = -(insertionPoint + 2); // See docs for binarySearch return value
531+
}
532+
533+
if (slot >= 0 && slot < rangeList.size() &&
534+
rangeList.get(slot).contains(comparableVal)) { // It could be out of range
535+
builders[slot].add(docId);
536+
}
537+
538+
// Also add to any relevant Ranges in the otherList
539+
slot = rangeList.size();
540+
for (Range range : otherList) {
541+
if (range.contains(comparableVal)) {
542+
builders[slot].add(docId);
543+
}
544+
slot++;
545+
}
546+
}
547+
}
383548

384549
// Essentially copied from SimpleFacets...
385550
// would be nice to unify this stuff w/ analytics component...

solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java

+1
Original file line numberDiff line numberDiff line change
@@ -783,6 +783,7 @@ public FacetRange parse(Object arg) throws SyntaxError {
783783
facet.gap = m.get("gap");
784784
facet.hardend = getBoolean(m, "hardend", facet.hardend);
785785
facet.mincount = getLong(m, "mincount", 0);
786+
facet.method = FacetRange.FacetMethod.fromString(getString(m, "method", null));
786787

787788
// TODO: refactor list-of-options code
788789

0 commit comments

Comments
 (0)