Skip to content

Commit 8aae4dd

Browse files
committed
Merge branch 'termfreqfreq' into bitmapfrequency
2 parents f682d1a + 62849e9 commit 8aae4dd

File tree

5 files changed

+101
-38
lines changed

5 files changed

+101
-38
lines changed

solr/core/src/java/org/apache/solr/search/ValueSourceParser.java

+33-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,39 @@
3333
import org.apache.lucene.queries.function.docvalues.BoolDocValues;
3434
import org.apache.lucene.queries.function.docvalues.DoubleDocValues;
3535
import org.apache.lucene.queries.function.docvalues.LongDocValues;
36-
import org.apache.lucene.queries.function.valuesource.*;
36+
import org.apache.lucene.queries.function.valuesource.ConstNumberSource;
37+
import org.apache.lucene.queries.function.valuesource.ConstValueSource;
38+
import org.apache.lucene.queries.function.valuesource.DefFunction;
39+
import org.apache.lucene.queries.function.valuesource.DivFloatFunction;
40+
import org.apache.lucene.queries.function.valuesource.DocFreqValueSource;
41+
import org.apache.lucene.queries.function.valuesource.DoubleConstValueSource;
42+
import org.apache.lucene.queries.function.valuesource.DualFloatFunction;
43+
import org.apache.lucene.queries.function.valuesource.IDFValueSource;
44+
import org.apache.lucene.queries.function.valuesource.IfFunction;
45+
import org.apache.lucene.queries.function.valuesource.JoinDocFreqValueSource;
46+
import org.apache.lucene.queries.function.valuesource.LinearFloatFunction;
47+
import org.apache.lucene.queries.function.valuesource.LiteralValueSource;
48+
import org.apache.lucene.queries.function.valuesource.MaxDocValueSource;
49+
import org.apache.lucene.queries.function.valuesource.MaxFloatFunction;
50+
import org.apache.lucene.queries.function.valuesource.MinFloatFunction;
51+
import org.apache.lucene.queries.function.valuesource.MultiBoolFunction;
52+
import org.apache.lucene.queries.function.valuesource.MultiValueSource;
53+
import org.apache.lucene.queries.function.valuesource.NormValueSource;
54+
import org.apache.lucene.queries.function.valuesource.NumDocsValueSource;
55+
import org.apache.lucene.queries.function.valuesource.ProductFloatFunction;
56+
import org.apache.lucene.queries.function.valuesource.QueryValueSource;
57+
import org.apache.lucene.queries.function.valuesource.RangeMapFloatFunction;
58+
import org.apache.lucene.queries.function.valuesource.ReciprocalFloatFunction;
59+
import org.apache.lucene.queries.function.valuesource.ScaleFloatFunction;
60+
import org.apache.lucene.queries.function.valuesource.SimpleBoolFunction;
61+
import org.apache.lucene.queries.function.valuesource.SimpleFloatFunction;
62+
import org.apache.lucene.queries.function.valuesource.SingleFunction;
63+
import org.apache.lucene.queries.function.valuesource.SumFloatFunction;
64+
import org.apache.lucene.queries.function.valuesource.SumTotalTermFreqValueSource;
65+
import org.apache.lucene.queries.function.valuesource.TFValueSource;
66+
import org.apache.lucene.queries.function.valuesource.TermFreqValueSource;
67+
import org.apache.lucene.queries.function.valuesource.TotalTermFreqValueSource;
68+
import org.apache.lucene.queries.function.valuesource.VectorValueSource;
3769
import org.apache.lucene.queries.payloads.PayloadDecoder;
3870
import org.apache.lucene.queries.payloads.PayloadFunction;
3971
import org.apache.lucene.search.IndexSearcher;
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,74 @@
11
package org.apache.solr.search.facet;
22

33
import java.util.HashMap;
4+
import java.util.LinkedHashMap;
45
import java.util.Map;
56
import java.util.stream.Collectors;
67

8+
import org.apache.solr.common.util.NamedList;
79
import org.apache.solr.common.util.SimpleOrderedMap;
810

911
public class TermFrequencyCounter {
10-
private final Map<String, Integer> counters;
12+
private final Map<String, Integer> counts;
13+
private boolean overflow;
1114

1215
public TermFrequencyCounter() {
13-
this.counters = new HashMap<>();
16+
this.counts = new HashMap<>();
1417
}
1518

16-
public Map<String, Integer> getCounters() {
17-
return this.counters;
19+
public Map<String, Integer> getCounts() {
20+
return this.counts;
1821
}
1922

2023
public void add(String value) {
21-
counters.merge(value, 1, Integer::sum);
24+
counts.merge(value, 1, Integer::sum);
2225
}
2326

24-
public Map<String, Integer> serialize(int limit) {
25-
if (limit < Integer.MAX_VALUE && limit < counters.size()) {
26-
return counters.entrySet()
27-
.stream()
28-
.sorted((l, r) -> r.getValue() - l.getValue()) // sort by value descending
29-
.limit(limit)
30-
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
27+
public SimpleOrderedMap<Object> serialize(int limit) {
28+
SimpleOrderedMap<Object> result = new SimpleOrderedMap<>();
29+
30+
if (limit < counts.size()) {
31+
result.add("counts", getTopCounts(counts, limit));
32+
result.add("overflow", Boolean.TRUE);
3133
} else {
32-
return counters;
34+
result.add("counts", counts);
35+
result.add("overflow", Boolean.FALSE);
3336
}
37+
38+
return result;
39+
}
40+
41+
private Map<String, Integer> getTopCounts(Map<String, Integer> counters, int limit) {
42+
return counters.entrySet()
43+
.stream()
44+
.sorted((l, r) -> r.getValue() - l.getValue()) // sort by value descending
45+
.limit(limit)
46+
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
3447
}
3548

36-
public TermFrequencyCounter merge(Map<String, Integer> serialized) {
37-
serialized.forEach((value, freq) -> counters.merge(value, freq, Integer::sum));
49+
public TermFrequencyCounter merge(NamedList<Object> serialized) {
50+
final Map<String, Integer> counts = (Map<String, Integer>) serialized.get("counts");
51+
if (counts != null) {
52+
counts.forEach((value, freq) -> this.counts.merge(value, freq, Integer::sum));
53+
}
54+
55+
final Boolean overflow = (Boolean) serialized.get("overflow");
56+
if (overflow != null) {
57+
this.overflow = this.overflow || overflow;
58+
}
3859

3960
return this;
4061
}
62+
63+
public SimpleOrderedMap<Object> toFrequencyOfFrequencies() {
64+
SimpleOrderedMap<Object> result = new SimpleOrderedMap<>();
65+
66+
Map<Integer, Integer> frequencies = new LinkedHashMap<>();
67+
counts.forEach((value, freq) -> frequencies.merge(freq, 1, Integer::sum));
68+
69+
result.add("frequencies", frequencies);
70+
result.add("overflow", overflow);
71+
72+
return result;
73+
}
4174
}

solr/core/src/java/org/apache/solr/search/facet/TermFrequencyOfFrequenciesAgg.java

+5-13
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
11
package org.apache.solr.search.facet;
22

3-
import java.util.LinkedHashMap;
4-
import java.util.Map;
5-
63
import org.apache.lucene.queries.function.ValueSource;
74
import org.apache.solr.common.util.SimpleOrderedMap;
85
import org.apache.solr.search.FunctionQParser;
@@ -25,7 +22,7 @@ public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) {
2522

2623
@Override
2724
public FacetMerger createFacetMerger(Object prototype) {
28-
return new Merger(termLimit);
25+
return new Merger();
2926
}
3027

3128
public static class Parser extends ValueSourceParser {
@@ -45,14 +42,14 @@ public ValueSource parse(FunctionQParser fp) throws SyntaxError {
4542
private static class Merger extends FacetMerger {
4643
private final TermFrequencyCounter result;
4744

48-
public Merger(int termLimit) {
45+
public Merger() {
4946
this.result = new TermFrequencyCounter();
5047
}
5148

5249
@Override
5350
public void merge(Object facetResult, Context mcontext) {
54-
if (facetResult instanceof Map) {
55-
result.merge((Map<String, Integer>) facetResult);
51+
if (facetResult instanceof SimpleOrderedMap) {
52+
result.merge((SimpleOrderedMap<Object>) facetResult);
5653
}
5754
}
5855

@@ -63,12 +60,7 @@ public void finish(Context mcontext) {
6360

6461
@Override
6562
public Object getMergedResult() {
66-
Map<Integer, Integer> map = new LinkedHashMap<>();
67-
68-
result.getCounters()
69-
.forEach((value, freq) -> map.merge(freq, 1, Integer::sum));
70-
71-
return map;
63+
return result.toFrequencyOfFrequencies();
7264
}
7365
}
7466
}

solr/core/src/java/org/apache/solr/search/facet/TermFrequencySlotAcc.java

+12-4
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
import java.io.IOException;
44
import java.util.Arrays;
5-
import java.util.Collections;
65
import java.util.function.IntFunction;
76

87
import org.apache.lucene.queries.function.ValueSource;
8+
import org.apache.solr.common.util.SimpleOrderedMap;
99

1010
public class TermFrequencySlotAcc extends FuncSlotAcc {
1111
private TermFrequencyCounter[] result;
@@ -33,10 +33,18 @@ public int compare(int slotA, int slotB) {
3333

3434
@Override
3535
public Object getValue(int slotNum) {
36-
if (result[slotNum] != null) {
37-
return result[slotNum].serialize(termLimit);
36+
if (fcontext.isShard()) {
37+
if (result[slotNum] != null) {
38+
return result[slotNum].serialize(termLimit);
39+
} else {
40+
return new SimpleOrderedMap<>();
41+
}
3842
} else {
39-
return Collections.emptyList();
43+
if (result[slotNum] != null) {
44+
return result[slotNum].toFrequencyOfFrequencies();
45+
} else {
46+
return new SimpleOrderedMap<>();
47+
}
4048
}
4149
}
4250

solr/core/src/test/org/apache/solr/search/facet/TermFrequencyCounterTest.java

+3-5
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,7 @@
66
import java.io.InputStream;
77
import java.util.HashMap;
88
import java.util.Map;
9-
import java.util.Random;
109

11-
import com.carrotsearch.randomizedtesting.annotations.Seed;
1210
import org.apache.lucene.util.LuceneTestCase;
1311
import org.apache.solr.common.util.JavaBinCodec;
1412
import org.apache.solr.common.util.SimpleOrderedMap;
@@ -104,7 +102,7 @@ private static void assertCount(TermFrequencyCounter counter, String value, int
104102
assertEquals(
105103
"value " + value + " should have count " + count,
106104
count,
107-
(int) counter.getCounters().getOrDefault(value, 0)
105+
(int) counter.getCounts().getOrDefault(value, 0)
108106
);
109107
}
110108

@@ -116,7 +114,7 @@ private static TermFrequencyCounter serdeser(TermFrequencyCounter counter, int l
116114

117115
InputStream in = new ByteArrayInputStream(out.toByteArray());
118116
counter = new TermFrequencyCounter();
119-
counter.merge((Map<String, Integer>) codec.unmarshal(in));
117+
counter.merge((SimpleOrderedMap<Object>) codec.unmarshal(in));
120118

121119
return counter;
122120
}
@@ -132,7 +130,7 @@ private static TermFrequencyCounter merge(
132130
codec.marshal(toMerge.serialize(limit), out);
133131

134132
InputStream in = new ByteArrayInputStream(out.toByteArray());
135-
counter.merge((Map<String, Integer>) codec.unmarshal(in));
133+
counter.merge((SimpleOrderedMap<Object>) codec.unmarshal(in));
136134

137135
return counter;
138136
}

0 commit comments

Comments
 (0)