Skip to content

Commit 564fea4

Browse files
committed
Add a method:dv to json Range Facet which should be faster for large numbers of buckets
1 parent 3cf1da0 commit 564fea4

File tree

2 files changed

+176
-10
lines changed

2 files changed

+176
-10
lines changed

solr/core/src/java/org/apache/solr/search/facet/FacetRange.java

+175-10
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,18 @@
1818

1919
import java.io.IOException;
2020
import java.util.ArrayList;
21+
import java.util.Arrays;
2122
import java.util.Date;
2223
import java.util.EnumSet;
2324
import java.util.HashMap;
2425
import java.util.List;
2526
import java.util.Map;
2627

28+
import org.apache.lucene.index.DocValues;
29+
import org.apache.lucene.index.LeafReaderContext;
30+
import org.apache.lucene.index.NumericDocValues;
2731
import org.apache.lucene.search.Query;
32+
import org.apache.lucene.search.SimpleCollector;
2833
import org.apache.lucene.util.NumericUtils;
2934
import org.apache.solr.common.SolrException;
3035
import org.apache.solr.common.params.FacetParams;
@@ -38,19 +43,42 @@
3843
import org.apache.solr.schema.TrieDateField;
3944
import org.apache.solr.schema.TrieField;
4045
import org.apache.solr.search.DocSet;
46+
import org.apache.solr.search.DocSetBuilder;
47+
import org.apache.solr.search.DocSetUtil;
4148
import org.apache.solr.search.facet.SlotAcc.SlotContext;
4249
import org.apache.solr.util.DateMathParser;
4350

4451
import static org.apache.solr.search.facet.FacetContext.SKIP_FACET;
4552

4653
public class FacetRange extends FacetRequestSorted {
54+
55+
public enum FacetMethod {
56+
DV, // Does a single pass using DocValues to sift into buckets
57+
ENUM, // Uses a RangeQuery for each bucket
58+
;
59+
60+
public static FacetRange.FacetMethod fromString(String method) {
61+
if (method == null || method.length() == 0) return ENUM;
62+
switch (method) {
63+
case "dv":
64+
return DV;
65+
case "enum":
66+
return ENUM;
67+
default:
68+
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown FacetRange method " + method);
69+
}
70+
}
71+
}
72+
73+
4774
String field;
4875
Object start;
4976
Object end;
5077
Object gap;
5178
boolean hardend = false;
5279
EnumSet<FacetParams.FacetRangeInclude> include;
5380
EnumSet<FacetParams.FacetRangeOther> others;
81+
FacetMethod method;
5482

5583
{
5684
// defaults
@@ -122,6 +150,26 @@ public Range(Object label, Comparable low, Comparable high, boolean includeLower
122150
this.includeLower = includeLower;
123151
this.includeUpper = includeUpper;
124152
}
153+
154+
public boolean contains(Comparable val) {
155+
if (low != null) {
156+
if (includeLower && val.compareTo(low) < 0) {
157+
return false;
158+
} else if (!includeLower && val.compareTo(low) <= 0) {
159+
return false;
160+
}
161+
}
162+
163+
if (high != null) {
164+
if (includeUpper && val.compareTo(high) > 0) {
165+
return false;
166+
} else if (!includeUpper && val.compareTo(high) >= 0) {
167+
return false;
168+
}
169+
}
170+
171+
return true;
172+
}
125173
}
126174

127175
/**
@@ -322,14 +370,26 @@ private SimpleOrderedMap getRangeCountsIndexed() throws IOException {
322370

323371
createAccs(fcontext.base.size(), slotCount);
324372

373+
FacetRangeMethod rangeMethod;
374+
if (freq.method == FacetRange.FacetMethod.DV) {
375+
if (!sf.hasDocValues() || sf.multiValued()) {
376+
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
377+
"Facet range method " + freq.method + " only works for single valued numeric fields with docValues");
378+
}
379+
rangeMethod = new FacetRangeByDocValues();
380+
} else {
381+
rangeMethod = new FacetRangeByQuery();
382+
}
383+
325384
for (int idx = 0; idx<rangeList.size(); idx++) {
326-
rangeStats(rangeList.get(idx), idx);
385+
rangeMethod.processRange(rangeList.get(idx), idx);
327386
}
328387

329388
for (int idx = 0; idx<otherList.size(); idx++) {
330-
rangeStats(otherList.get(idx), rangeList.size() + idx);
389+
rangeMethod.processRange(otherList.get(idx), rangeList.size() + idx);
331390
}
332391

392+
rangeMethod.finish();
333393

334394
final SimpleOrderedMap res = new SimpleOrderedMap<>();
335395
List<SimpleOrderedMap> buckets = new ArrayList<>();
@@ -359,14 +419,9 @@ private SimpleOrderedMap getRangeCountsIndexed() throws IOException {
359419

360420
private Query[] filters;
361421
private DocSet[] intersections;
362-
private void rangeStats(Range range, int slot) throws IOException {
363-
Query rangeQ = sf.getType().getRangeQuery(null, sf, range.low == null ? null : calc.formatValue(range.low), range.high==null ? null : calc.formatValue(range.high), range.includeLower, range.includeUpper);
364-
// TODO: specialize count only
365-
DocSet intersection = fcontext.searcher.getDocSet(rangeQ, fcontext.base);
366-
filters[slot] = rangeQ;
367-
intersections[slot] = intersection; // save for later // TODO: only save if number of slots is small enough?
368-
int num = collect(intersection, slot, slotNum -> { return new SlotContext(rangeQ); });
369-
countAcc.incrementCount(slot, num); // TODO: roll this into collect()
422+
423+
private Query buildRangeQuery(Range range) {
424+
return sf.getType().getRangeQuery(null, sf, range.low == null ? null : calc.formatValue(range.low), range.high==null ? null : calc.formatValue(range.high), range.includeLower, range.includeUpper);
370425
}
371426

372427
private void doSubs(SimpleOrderedMap bucket, int slot) throws IOException {
@@ -396,8 +451,118 @@ private SimpleOrderedMap<Object> rangeStats(Range range, boolean special ) thro
396451
return bucket;
397452
}
398453

454+
abstract class FacetRangeMethod {
455+
void processRange(Range range, int slot) throws IOException {
456+
filters[slot] = buildRangeQuery(range);
457+
doOneRange(range, slot);
458+
}
459+
abstract void doOneRange(Range range, int slot) throws IOException;
460+
abstract void finish() throws IOException;
461+
}
399462

463+
// Gathers the stats for each Range bucket by using a RangeQuery to run a search.
464+
// Suitable when the number of buckets is fairly low, or the base DocSet is big
465+
class FacetRangeByQuery extends FacetRangeMethod {
400466

467+
@Override
468+
void doOneRange(Range range, int slot) throws IOException {
469+
// TODO: specialize count only
470+
intersections[slot] = fcontext.searcher.getDocSet(filters[slot], fcontext.base);
471+
int num = collect(intersections[slot], slot, slotNum -> { return new SlotContext(filters[slotNum]); });
472+
countAcc.incrementCount(slot, num); // TODO: roll this into collect()
473+
}
474+
475+
@Override
476+
void finish() throws IOException { }
477+
}
478+
479+
// Gathers the stats by making a single pass over the base DocSet, using
480+
// the docValue for the field to sift into the appropriate Range buckets.
481+
// Suitable when the gap leads to many interval buckets, especially if this is a
482+
// subfacet inside a parent with many buckets of its own. However, this method
483+
// can be slower if the base DocSet is big
484+
class FacetRangeByDocValues extends FacetRangeMethod {
485+
486+
private DocSetBuilder[] builders;
487+
private Comparable[] starts;
488+
489+
FacetRangeByDocValues() {
490+
builders = new DocSetBuilder[intersections.length];
491+
starts = new Comparable[rangeList.size()];
492+
}
493+
494+
@Override
495+
void doOneRange(Range range, int slot) throws IOException {
496+
builders[slot] = new DocSetBuilder(fcontext.searcher.maxDoc(), fcontext.base.size() >> 2);
497+
if (slot < starts.length) {
498+
starts[slot] = range.low;
499+
}
500+
}
501+
502+
@Override
503+
void finish() throws IOException {
504+
DocSetUtil.collectSortedDocSet(fcontext.base, fcontext.searcher.getIndexReader(), new SimpleCollector() {
505+
int docBase;
506+
NumericDocValues values = null;
507+
508+
@Override
509+
public boolean needsScores() {
510+
return false;
511+
}
512+
513+
@Override
514+
protected void doSetNextReader(LeafReaderContext ctx) throws IOException {
515+
docBase = ctx.docBase;
516+
values = DocValues.getNumeric(ctx.reader(), sf.getName());
517+
}
518+
519+
@Override
520+
public void collect(int segDoc) throws IOException {
521+
if (values.advanceExact(segDoc)) {
522+
placeDocId(values.longValue(), docBase + segDoc);
523+
}
524+
}
525+
}
526+
);
527+
528+
for (int slot = 0; slot<builders.length; slot++) {
529+
intersections[slot] = builders[slot].buildUniqueInOrder(null);
530+
int num = collect(intersections[slot], slot, slotNum -> { return new SlotContext(filters[slotNum]); });
531+
countAcc.incrementCount(slot, num);
532+
}
533+
}
534+
535+
void placeDocId(long val, int docId) {
536+
Comparable comparableVal = calc.bitsToValue(val);
537+
538+
int insertionPoint = Arrays.binarySearch(starts, comparableVal);
539+
540+
int slot;
541+
if (insertionPoint >= 0) {
542+
if (rangeList.get(insertionPoint).includeLower) {
543+
slot = insertionPoint;
544+
} else {
545+
slot = insertionPoint - 1;
546+
}
547+
} else {
548+
slot = -(insertionPoint + 2); // See docs for binarySearch return value
549+
}
550+
551+
if (slot >= 0 && slot < rangeList.size() &&
552+
rangeList.get(slot).contains(comparableVal)) { // It could be out of range
553+
builders[slot].add(docId);
554+
}
555+
556+
// Also add to any relevant Ranges in the otherList
557+
slot = rangeList.size();
558+
for (Range range : otherList) {
559+
if (range.contains(comparableVal)) {
560+
builders[slot].add(docId);
561+
}
562+
slot++;
563+
}
564+
}
565+
}
401566

402567
// Essentially copied from SimpleFacets...
403568
// would be nice to unify this stuff w/ analytics component...

solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java

+1
Original file line numberDiff line numberDiff line change
@@ -871,6 +871,7 @@ public FacetRange parse(Object arg) throws SyntaxError {
871871
facet.gap = m.get("gap");
872872
facet.hardend = getBoolean(m, "hardend", facet.hardend);
873873
facet.mincount = getLong(m, "mincount", 0);
874+
facet.method = FacetRange.FacetMethod.fromString(getString(m, "method", null));
874875

875876
// TODO: refactor list-of-options code
876877

0 commit comments

Comments
 (0)