|
18 | 18 |
|
19 | 19 | import java.io.IOException;
|
20 | 20 | import java.util.ArrayList;
|
| 21 | +import java.util.Arrays; |
21 | 22 | import java.util.Date;
|
22 | 23 | import java.util.EnumSet;
|
23 | 24 | import java.util.HashMap;
|
24 | 25 | import java.util.List;
|
25 | 26 | import java.util.Map;
|
26 | 27 |
|
| 28 | +import org.apache.lucene.index.DocValues; |
| 29 | +import org.apache.lucene.index.LeafReaderContext; |
| 30 | +import org.apache.lucene.index.NumericDocValues; |
27 | 31 | import org.apache.lucene.search.Query;
|
| 32 | +import org.apache.lucene.search.SimpleCollector; |
28 | 33 | import org.apache.lucene.util.NumericUtils;
|
29 | 34 | import org.apache.solr.common.SolrException;
|
30 | 35 | import org.apache.solr.common.params.FacetParams;
|
|
38 | 43 | import org.apache.solr.schema.TrieDateField;
|
39 | 44 | import org.apache.solr.schema.TrieField;
|
40 | 45 | import org.apache.solr.search.DocSet;
|
| 46 | +import org.apache.solr.search.DocSetBuilder; |
| 47 | +import org.apache.solr.search.DocSetUtil; |
41 | 48 | import org.apache.solr.search.facet.SlotAcc.SlotContext;
|
42 | 49 | import org.apache.solr.util.DateMathParser;
|
43 | 50 |
|
44 | 51 | import static org.apache.solr.search.facet.FacetContext.SKIP_FACET;
|
45 | 52 |
|
46 | 53 | public class FacetRange extends FacetRequestSorted {
|
| 54 | + |
| 55 | + public enum FacetMethod { |
| 56 | + DV, // Does a single pass using DocValues to sift into buckets |
| 57 | + ENUM, // Uses a RangeQuery for each bucket |
| 58 | + ; |
| 59 | + |
| 60 | + public static FacetRange.FacetMethod fromString(String method) { |
| 61 | + if (method == null || method.length() == 0) return ENUM; |
| 62 | + switch (method) { |
| 63 | + case "dv": |
| 64 | + return DV; |
| 65 | + case "enum": |
| 66 | + return ENUM; |
| 67 | + default: |
| 68 | + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown FacetRange method " + method); |
| 69 | + } |
| 70 | + } |
| 71 | + } |
| 72 | + |
| 73 | + |
47 | 74 | String field;
|
48 | 75 | Object start;
|
49 | 76 | Object end;
|
50 | 77 | Object gap;
|
51 | 78 | boolean hardend = false;
|
52 | 79 | EnumSet<FacetParams.FacetRangeInclude> include;
|
53 | 80 | EnumSet<FacetParams.FacetRangeOther> others;
|
| 81 | + FacetMethod method; |
54 | 82 |
|
55 | 83 | {
|
56 | 84 | // defaults
|
@@ -122,6 +150,26 @@ public Range(Object label, Comparable low, Comparable high, boolean includeLower
|
122 | 150 | this.includeLower = includeLower;
|
123 | 151 | this.includeUpper = includeUpper;
|
124 | 152 | }
|
| 153 | + |
| 154 | + public boolean contains(Comparable val) { |
| 155 | + if (low != null) { |
| 156 | + if (includeLower && val.compareTo(low) < 0) { |
| 157 | + return false; |
| 158 | + } else if (!includeLower && val.compareTo(low) <= 0) { |
| 159 | + return false; |
| 160 | + } |
| 161 | + } |
| 162 | + |
| 163 | + if (high != null) { |
| 164 | + if (includeUpper && val.compareTo(high) > 0) { |
| 165 | + return false; |
| 166 | + } else if (!includeUpper && val.compareTo(high) >= 0) { |
| 167 | + return false; |
| 168 | + } |
| 169 | + } |
| 170 | + |
| 171 | + return true; |
| 172 | + } |
125 | 173 | }
|
126 | 174 |
|
127 | 175 | /**
|
@@ -322,14 +370,26 @@ private SimpleOrderedMap getRangeCountsIndexed() throws IOException {
|
322 | 370 |
|
323 | 371 | createAccs(fcontext.base.size(), slotCount);
|
324 | 372 |
|
| 373 | + FacetRangeMethod rangeMethod; |
| 374 | + if (freq.method == FacetRange.FacetMethod.DV) { |
| 375 | + if (!sf.hasDocValues() || sf.multiValued()) { |
| 376 | + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, |
| 377 | + "Facet range method " + freq.method + " only works for single valued numeric fields with docValues"); |
| 378 | + } |
| 379 | + rangeMethod = new FacetRangeByDocValues(); |
| 380 | + } else { |
| 381 | + rangeMethod = new FacetRangeByQuery(); |
| 382 | + } |
| 383 | + |
325 | 384 | for (int idx = 0; idx<rangeList.size(); idx++) {
|
326 |
| - rangeStats(rangeList.get(idx), idx); |
| 385 | + rangeMethod.processRange(rangeList.get(idx), idx); |
327 | 386 | }
|
328 | 387 |
|
329 | 388 | for (int idx = 0; idx<otherList.size(); idx++) {
|
330 |
| - rangeStats(otherList.get(idx), rangeList.size() + idx); |
| 389 | + rangeMethod.processRange(otherList.get(idx), rangeList.size() + idx); |
331 | 390 | }
|
332 | 391 |
|
| 392 | + rangeMethod.finish(); |
333 | 393 |
|
334 | 394 | final SimpleOrderedMap res = new SimpleOrderedMap<>();
|
335 | 395 | List<SimpleOrderedMap> buckets = new ArrayList<>();
|
@@ -359,14 +419,9 @@ private SimpleOrderedMap getRangeCountsIndexed() throws IOException {
|
359 | 419 |
|
360 | 420 | private Query[] filters;
|
361 | 421 | private DocSet[] intersections;
|
362 |
| - private void rangeStats(Range range, int slot) throws IOException { |
363 |
| - Query rangeQ = sf.getType().getRangeQuery(null, sf, range.low == null ? null : calc.formatValue(range.low), range.high==null ? null : calc.formatValue(range.high), range.includeLower, range.includeUpper); |
364 |
| - // TODO: specialize count only |
365 |
| - DocSet intersection = fcontext.searcher.getDocSet(rangeQ, fcontext.base); |
366 |
| - filters[slot] = rangeQ; |
367 |
| - intersections[slot] = intersection; // save for later // TODO: only save if number of slots is small enough? |
368 |
| - int num = collect(intersection, slot, slotNum -> { return new SlotContext(rangeQ); }); |
369 |
| - countAcc.incrementCount(slot, num); // TODO: roll this into collect() |
| 422 | + |
| 423 | + private Query buildRangeQuery(Range range) { |
| 424 | + return sf.getType().getRangeQuery(null, sf, range.low == null ? null : calc.formatValue(range.low), range.high==null ? null : calc.formatValue(range.high), range.includeLower, range.includeUpper); |
370 | 425 | }
|
371 | 426 |
|
372 | 427 | private void doSubs(SimpleOrderedMap bucket, int slot) throws IOException {
|
@@ -396,8 +451,118 @@ private SimpleOrderedMap<Object> rangeStats(Range range, boolean special ) thro
|
396 | 451 | return bucket;
|
397 | 452 | }
|
398 | 453 |
|
| 454 | + abstract class FacetRangeMethod { |
| 455 | + void processRange(Range range, int slot) throws IOException { |
| 456 | + filters[slot] = buildRangeQuery(range); |
| 457 | + doOneRange(range, slot); |
| 458 | + } |
| 459 | + abstract void doOneRange(Range range, int slot) throws IOException; |
| 460 | + abstract void finish() throws IOException; |
| 461 | + } |
399 | 462 |
|
| 463 | + // Gathers the stats for each Range bucket by using a RangeQuery to run a search. |
| 464 | + // Suitable when the number of buckets is fairly low, or the base DocSet is big |
| 465 | + class FacetRangeByQuery extends FacetRangeMethod { |
400 | 466 |
|
| 467 | + @Override |
| 468 | + void doOneRange(Range range, int slot) throws IOException { |
| 469 | + // TODO: specialize count only |
| 470 | + intersections[slot] = fcontext.searcher.getDocSet(filters[slot], fcontext.base); |
| 471 | + int num = collect(intersections[slot], slot, slotNum -> { return new SlotContext(filters[slotNum]); }); |
| 472 | + countAcc.incrementCount(slot, num); // TODO: roll this into collect() |
| 473 | + } |
| 474 | + |
| 475 | + @Override |
| 476 | + void finish() throws IOException { } |
| 477 | + } |
| 478 | + |
| 479 | + // Gathers the stats by making a single pass over the base DocSet, using |
| 480 | + // the docValue for the field to sift into the appropriate Range buckets. |
| 481 | + // Suitable when the gap leads to many interval buckets, especially if this is a |
| 482 | + // subfacet inside a parent with many buckets of its own. However, this method |
| 483 | + // can be slower if the base DocSet is big |
| 484 | + class FacetRangeByDocValues extends FacetRangeMethod { |
| 485 | + |
| 486 | + private DocSetBuilder[] builders; |
| 487 | + private Comparable[] starts; |
| 488 | + |
| 489 | + FacetRangeByDocValues() { |
| 490 | + builders = new DocSetBuilder[intersections.length]; |
| 491 | + starts = new Comparable[rangeList.size()]; |
| 492 | + } |
| 493 | + |
| 494 | + @Override |
| 495 | + void doOneRange(Range range, int slot) throws IOException { |
| 496 | + builders[slot] = new DocSetBuilder(fcontext.searcher.maxDoc(), fcontext.base.size() >> 2); |
| 497 | + if (slot < starts.length) { |
| 498 | + starts[slot] = range.low; |
| 499 | + } |
| 500 | + } |
| 501 | + |
| 502 | + @Override |
| 503 | + void finish() throws IOException { |
| 504 | + DocSetUtil.collectSortedDocSet(fcontext.base, fcontext.searcher.getIndexReader(), new SimpleCollector() { |
| 505 | + int docBase; |
| 506 | + NumericDocValues values = null; |
| 507 | + |
| 508 | + @Override |
| 509 | + public boolean needsScores() { |
| 510 | + return false; |
| 511 | + } |
| 512 | + |
| 513 | + @Override |
| 514 | + protected void doSetNextReader(LeafReaderContext ctx) throws IOException { |
| 515 | + docBase = ctx.docBase; |
| 516 | + values = DocValues.getNumeric(ctx.reader(), sf.getName()); |
| 517 | + } |
| 518 | + |
| 519 | + @Override |
| 520 | + public void collect(int segDoc) throws IOException { |
| 521 | + if (values.advanceExact(segDoc)) { |
| 522 | + placeDocId(values.longValue(), docBase + segDoc); |
| 523 | + } |
| 524 | + } |
| 525 | + } |
| 526 | + ); |
| 527 | + |
| 528 | + for (int slot = 0; slot<builders.length; slot++) { |
| 529 | + intersections[slot] = builders[slot].buildUniqueInOrder(null); |
| 530 | + int num = collect(intersections[slot], slot, slotNum -> { return new SlotContext(filters[slotNum]); }); |
| 531 | + countAcc.incrementCount(slot, num); |
| 532 | + } |
| 533 | + } |
| 534 | + |
| 535 | + void placeDocId(long val, int docId) { |
| 536 | + Comparable comparableVal = calc.bitsToValue(val); |
| 537 | + |
| 538 | + int insertionPoint = Arrays.binarySearch(starts, comparableVal); |
| 539 | + |
| 540 | + int slot; |
| 541 | + if (insertionPoint >= 0) { |
| 542 | + if (rangeList.get(insertionPoint).includeLower) { |
| 543 | + slot = insertionPoint; |
| 544 | + } else { |
| 545 | + slot = insertionPoint - 1; |
| 546 | + } |
| 547 | + } else { |
| 548 | + slot = -(insertionPoint + 2); // See docs for binarySearch return value |
| 549 | + } |
| 550 | + |
| 551 | + if (slot >= 0 && slot < rangeList.size() && |
| 552 | + rangeList.get(slot).contains(comparableVal)) { // It could be out of range |
| 553 | + builders[slot].add(docId); |
| 554 | + } |
| 555 | + |
| 556 | + // Also add to any relevant Ranges in the otherList |
| 557 | + slot = rangeList.size(); |
| 558 | + for (Range range : otherList) { |
| 559 | + if (range.contains(comparableVal)) { |
| 560 | + builders[slot].add(docId); |
| 561 | + } |
| 562 | + slot++; |
| 563 | + } |
| 564 | + } |
| 565 | + } |
401 | 566 |
|
402 | 567 | // Essentially copied from SimpleFacets...
|
403 | 568 | // would be nice to unify this stuff w/ analytics component...
|
|
0 commit comments