|
18 | 18 |
|
19 | 19 | import java.io.IOException;
|
20 | 20 | import java.util.ArrayList;
|
| 21 | +import java.util.Arrays; |
21 | 22 | import java.util.Date;
|
22 | 23 | import java.util.EnumSet;
|
23 | 24 | import java.util.HashMap;
|
24 | 25 | import java.util.List;
|
25 | 26 | import java.util.Map;
|
26 | 27 |
|
| 28 | +import org.apache.lucene.index.DocValues; |
| 29 | +import org.apache.lucene.index.LeafReaderContext; |
| 30 | +import org.apache.lucene.index.NumericDocValues; |
27 | 31 | import org.apache.lucene.search.Query;
|
| 32 | +import org.apache.lucene.search.SimpleCollector; |
28 | 33 | import org.apache.lucene.util.NumericUtils;
|
29 | 34 | import org.apache.solr.common.SolrException;
|
30 | 35 | import org.apache.solr.common.params.FacetParams;
|
|
35 | 40 | import org.apache.solr.schema.TrieDateField;
|
36 | 41 | import org.apache.solr.schema.TrieField;
|
37 | 42 | import org.apache.solr.search.DocSet;
|
| 43 | +import org.apache.solr.search.DocSetBuilder; |
| 44 | +import org.apache.solr.search.DocSetUtil; |
38 | 45 | import org.apache.solr.util.DateMathParser;
|
39 | 46 |
|
40 | 47 | import static org.apache.solr.search.facet.FacetContext.SKIP_FACET;
|
41 | 48 |
|
42 | 49 | public class FacetRange extends FacetRequestSorted {
|
| 50 | + |
| 51 | + public enum FacetMethod { |
| 52 | + DV, // Does a single pass using DocValues to sift into buckets |
| 53 | + ENUM, // Uses a RangeQuery for each bucket |
| 54 | + ; |
| 55 | + |
| 56 | + public static FacetRange.FacetMethod fromString(String method) { |
| 57 | + if (method == null || method.length() == 0) return ENUM; |
| 58 | + switch (method) { |
| 59 | + case "dv": |
| 60 | + return DV; |
| 61 | + case "enum": |
| 62 | + return ENUM; |
| 63 | + default: |
| 64 | + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown FacetRange method " + method); |
| 65 | + } |
| 66 | + } |
| 67 | + } |
| 68 | + |
| 69 | + |
43 | 70 | String field;
|
44 | 71 | Object start;
|
45 | 72 | Object end;
|
46 | 73 | Object gap;
|
47 | 74 | boolean hardend = false;
|
48 | 75 | EnumSet<FacetParams.FacetRangeInclude> include;
|
49 | 76 | EnumSet<FacetParams.FacetRangeOther> others;
|
| 77 | + FacetMethod method; |
50 | 78 |
|
51 | 79 | {
|
52 | 80 | // defaults
|
@@ -114,6 +142,26 @@ public Range(Object label, Comparable low, Comparable high, boolean includeLower
|
114 | 142 | this.includeLower = includeLower;
|
115 | 143 | this.includeUpper = includeUpper;
|
116 | 144 | }
|
| 145 | + |
| 146 | + public boolean contains(Comparable val) { |
| 147 | + if (low != null) { |
| 148 | + if (includeLower && val.compareTo(low) < 0) { |
| 149 | + return false; |
| 150 | + } else if (!includeLower && val.compareTo(low) <= 0) { |
| 151 | + return false; |
| 152 | + } |
| 153 | + } |
| 154 | + |
| 155 | + if (high != null) { |
| 156 | + if (includeUpper && val.compareTo(high) > 0) { |
| 157 | + return false; |
| 158 | + } else if (!includeUpper && val.compareTo(high) >= 0) { |
| 159 | + return false; |
| 160 | + } |
| 161 | + } |
| 162 | + |
| 163 | + return true; |
| 164 | + } |
117 | 165 | }
|
118 | 166 |
|
119 | 167 | public static Calc getNumericCalc(SchemaField sf) {
|
@@ -304,14 +352,26 @@ private SimpleOrderedMap getRangeCountsIndexed() throws IOException {
|
304 | 352 |
|
305 | 353 | createAccs(fcontext.base.size(), slotCount);
|
306 | 354 |
|
| 355 | + FacetRangeMethod rangeMethod; |
| 356 | + if (freq.method == FacetRange.FacetMethod.DV) { |
| 357 | + if (!sf.hasDocValues() || sf.multiValued()) { |
| 358 | + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, |
| 359 | + "Facet range method " + freq.method + " only works for single valued numeric fields with docValues"); |
| 360 | + } |
| 361 | + rangeMethod = new FacetRangeByDocValues(); |
| 362 | + } else { |
| 363 | + rangeMethod = new FacetRangeByQuery(); |
| 364 | + } |
| 365 | + |
307 | 366 | for (int idx = 0; idx<rangeList.size(); idx++) {
|
308 |
| - rangeStats(rangeList.get(idx), idx); |
| 367 | + rangeMethod.processRange(rangeList.get(idx), idx); |
309 | 368 | }
|
310 | 369 |
|
311 | 370 | for (int idx = 0; idx<otherList.size(); idx++) {
|
312 |
| - rangeStats(otherList.get(idx), rangeList.size() + idx); |
| 371 | + rangeMethod.processRange(otherList.get(idx), rangeList.size() + idx); |
313 | 372 | }
|
314 | 373 |
|
| 374 | + rangeMethod.finish(); |
315 | 375 |
|
316 | 376 | final SimpleOrderedMap res = new SimpleOrderedMap<>();
|
317 | 377 | List<SimpleOrderedMap> buckets = new ArrayList<>();
|
@@ -341,14 +401,9 @@ private SimpleOrderedMap getRangeCountsIndexed() throws IOException {
|
341 | 401 |
|
342 | 402 | private Query[] filters;
|
343 | 403 | private DocSet[] intersections;
|
344 |
| - private void rangeStats(Range range, int slot) throws IOException { |
345 |
| - Query rangeQ = sf.getType().getRangeQuery(null, sf, range.low == null ? null : calc.formatValue(range.low), range.high==null ? null : calc.formatValue(range.high), range.includeLower, range.includeUpper); |
346 |
| - // TODO: specialize count only |
347 |
| - DocSet intersection = fcontext.searcher.getDocSet(rangeQ, fcontext.base); |
348 |
| - filters[slot] = rangeQ; |
349 |
| - intersections[slot] = intersection; // save for later // TODO: only save if number of slots is small enough? |
350 |
| - int num = collect(intersection, slot); |
351 |
| - countAcc.incrementCount(slot, num); // TODO: roll this into collect() |
| 404 | + |
| 405 | + private Query buildRangeQuery(Range range) { |
| 406 | + return sf.getType().getRangeQuery(null, sf, range.low == null ? null : calc.formatValue(range.low), range.high==null ? null : calc.formatValue(range.high), range.includeLower, range.includeUpper); |
352 | 407 | }
|
353 | 408 |
|
354 | 409 | private void doSubs(SimpleOrderedMap bucket, int slot) throws IOException {
|
@@ -378,8 +433,118 @@ private SimpleOrderedMap<Object> rangeStats(Range range, boolean special ) thro
|
378 | 433 | return bucket;
|
379 | 434 | }
|
380 | 435 |
|
| 436 | + abstract class FacetRangeMethod { |
| 437 | + void processRange(Range range, int slot) throws IOException { |
| 438 | + filters[slot] = buildRangeQuery(range); |
| 439 | + doOneRange(range, slot); |
| 440 | + } |
| 441 | + abstract void doOneRange(Range range, int slot) throws IOException; |
| 442 | + abstract void finish() throws IOException; |
| 443 | + } |
381 | 444 |
|
| 445 | + // Gathers the stats for each Range bucket by using a RangeQuery to run a search. |
| 446 | + // Suitable when the number of buckets is fairly low, or the base DocSet is big |
| 447 | + class FacetRangeByQuery extends FacetRangeMethod { |
382 | 448 |
|
| 449 | + @Override |
| 450 | + void doOneRange(Range range, int slot) throws IOException { |
| 451 | + // TODO: specialize count only |
| 452 | + intersections[slot] = fcontext.searcher.getDocSet(filters[slot], fcontext.base); |
| 453 | + int num = collect(intersections[slot], slot); |
| 454 | + countAcc.incrementCount(slot, num); // TODO: roll this into collect() |
| 455 | + } |
| 456 | + |
| 457 | + @Override |
| 458 | + void finish() throws IOException { } |
| 459 | + } |
| 460 | + |
| 461 | + // Gathers the stats by making a single pass over the base DocSet, using |
| 462 | + // the docValue for the field to sift into the appropriate Range buckets. |
| 463 | + // Suitable when the gap leads to many interval buckets, especially if this is a |
| 464 | + // subfacet inside a parent with many buckets of its own. However, this method |
| 465 | + // can be slower if the base DocSet is big |
| 466 | + class FacetRangeByDocValues extends FacetRangeMethod { |
| 467 | + |
| 468 | + private DocSetBuilder[] builders; |
| 469 | + private Comparable[] starts; |
| 470 | + |
| 471 | + FacetRangeByDocValues() { |
| 472 | + builders = new DocSetBuilder[intersections.length]; |
| 473 | + starts = new Comparable[rangeList.size()]; |
| 474 | + } |
| 475 | + |
| 476 | + @Override |
| 477 | + void doOneRange(Range range, int slot) throws IOException { |
| 478 | + builders[slot] = new DocSetBuilder(fcontext.searcher.maxDoc(), fcontext.base.size() >> 2); |
| 479 | + if (slot < starts.length) { |
| 480 | + starts[slot] = range.low; |
| 481 | + } |
| 482 | + } |
| 483 | + |
| 484 | + @Override |
| 485 | + void finish() throws IOException { |
| 486 | + DocSetUtil.collectSortedDocSet(fcontext.base, fcontext.searcher.getIndexReader(), new SimpleCollector() { |
| 487 | + int docBase; |
| 488 | + NumericDocValues values = null; |
| 489 | + |
| 490 | + @Override |
| 491 | + public boolean needsScores() { |
| 492 | + return false; |
| 493 | + } |
| 494 | + |
| 495 | + @Override |
| 496 | + protected void doSetNextReader(LeafReaderContext ctx) throws IOException { |
| 497 | + docBase = ctx.docBase; |
| 498 | + values = DocValues.getNumeric(ctx.reader(), sf.getName()); |
| 499 | + } |
| 500 | + |
| 501 | + @Override |
| 502 | + public void collect(int segDoc) throws IOException { |
| 503 | + if (values.advanceExact(segDoc)) { |
| 504 | + placeDocId(values.longValue(), docBase + segDoc); |
| 505 | + } |
| 506 | + } |
| 507 | + } |
| 508 | + ); |
| 509 | + |
| 510 | + for (int slot = 0; slot<builders.length; slot++) { |
| 511 | + intersections[slot] = builders[slot].buildUniqueInOrder(null); |
| 512 | + int num = collect(intersections[slot], slot); |
| 513 | + countAcc.incrementCount(slot, num); |
| 514 | + } |
| 515 | + } |
| 516 | + |
| 517 | + void placeDocId(long val, int docId) { |
| 518 | + Comparable comparableVal = calc.bitsToValue(val); |
| 519 | + |
| 520 | + int insertionPoint = Arrays.binarySearch(starts, comparableVal); |
| 521 | + |
| 522 | + int slot; |
| 523 | + if (insertionPoint >= 0) { |
| 524 | + if (rangeList.get(insertionPoint).includeLower) { |
| 525 | + slot = insertionPoint; |
| 526 | + } else { |
| 527 | + slot = insertionPoint - 1; |
| 528 | + } |
| 529 | + } else { |
| 530 | + slot = -(insertionPoint + 2); // See docs for binarySearch return value |
| 531 | + } |
| 532 | + |
| 533 | + if (slot >= 0 && slot < rangeList.size() && |
| 534 | + rangeList.get(slot).contains(comparableVal)) { // It could be out of range |
| 535 | + builders[slot].add(docId); |
| 536 | + } |
| 537 | + |
| 538 | + // Also add to any relevant Ranges in the otherList |
| 539 | + slot = rangeList.size(); |
| 540 | + for (Range range : otherList) { |
| 541 | + if (range.contains(comparableVal)) { |
| 542 | + builders[slot].add(docId); |
| 543 | + } |
| 544 | + slot++; |
| 545 | + } |
| 546 | + } |
| 547 | + } |
383 | 548 |
|
384 | 549 | // Essentially copied from SimpleFacets...
|
385 | 550 | // would be nice to unify this stuff w/ analytics component...
|
|
0 commit comments