Skip to content

Commit 2e49c19

Browse files
committed
Merge branch 'termfreqfreq' into bitmapfrequency
2 parents 04c2716 + 4885310 commit 2e49c19

File tree

13 files changed

+673
-14
lines changed

13 files changed

+673
-14
lines changed

solr/core/src/java/org/apache/solr/search/ValueSourceParser.java

+3
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
import org.apache.solr.search.facet.StddevAgg;
7070
import org.apache.solr.search.facet.SumAgg;
7171
import org.apache.solr.search.facet.SumsqAgg;
72+
import org.apache.solr.search.facet.TermFrequencyOfFrequenciesAgg;
7273
import org.apache.solr.search.facet.TopDocsAgg;
7374
import org.apache.solr.search.facet.UniqueAgg;
7475
import org.apache.solr.search.facet.UniqueBlockAgg;
@@ -1071,6 +1072,8 @@ public ValueSource parse(FunctionQParser fp) throws SyntaxError {
10711072

10721073
addParser("agg_bitmapfreqfreq64", new FrequencyOfFrequenciesAgg64.Parser());
10731074

1075+
addParser("agg_termfreqfreq", new TermFrequencyOfFrequenciesAgg.Parser());
1076+
10741077
addParser("childfield", new ChildFieldValueSourceParser());
10751078
}
10761079

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
package org.apache.solr.search.facet;
2+
3+
import java.util.HashMap;
4+
import java.util.Map;
5+
import java.util.stream.Collectors;
6+
7+
import org.apache.solr.common.util.SimpleOrderedMap;
8+
9+
public class TermFrequencyCounter {
10+
private final Map<String, Integer> counters;
11+
12+
public TermFrequencyCounter() {
13+
this.counters = new HashMap<>();
14+
}
15+
16+
public Map<String, Integer> getCounters() {
17+
return this.counters;
18+
}
19+
20+
public void add(String value) {
21+
counters.merge(value, 1, Integer::sum);
22+
}
23+
24+
public Map<String, Integer> serialize(int limit) {
25+
if (limit < Integer.MAX_VALUE && limit < counters.size()) {
26+
return counters.entrySet()
27+
.stream()
28+
.sorted((l, r) -> r.getValue() - l.getValue()) // sort by value descending
29+
.limit(limit)
30+
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
31+
} else {
32+
return counters;
33+
}
34+
}
35+
36+
public TermFrequencyCounter merge(Map<String, Integer> serialized) {
37+
serialized.forEach((value, freq) -> counters.merge(value, freq, Integer::sum));
38+
39+
return this;
40+
}
41+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
package org.apache.solr.search.facet;
2+
3+
import java.util.LinkedHashMap;
4+
import java.util.Map;
5+
6+
import org.apache.lucene.queries.function.ValueSource;
7+
import org.apache.solr.common.util.SimpleOrderedMap;
8+
import org.apache.solr.search.FunctionQParser;
9+
import org.apache.solr.search.SyntaxError;
10+
import org.apache.solr.search.ValueSourceParser;
11+
12+
public class TermFrequencyOfFrequenciesAgg extends SimpleAggValueSource {
13+
private final int termLimit;
14+
15+
public TermFrequencyOfFrequenciesAgg(ValueSource vs, int termLimit) {
16+
super("termfreqfreq", vs);
17+
18+
this.termLimit = termLimit;
19+
}
20+
21+
@Override
22+
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) {
23+
return new TermFrequencySlotAcc(getArg(), fcontext, numSlots, termLimit);
24+
}
25+
26+
@Override
27+
public FacetMerger createFacetMerger(Object prototype) {
28+
return new Merger(termLimit);
29+
}
30+
31+
public static class Parser extends ValueSourceParser {
32+
@Override
33+
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
34+
ValueSource vs = fp.parseValueSource();
35+
36+
int termLimit = Integer.MAX_VALUE;
37+
if (fp.hasMoreArguments()) {
38+
termLimit = fp.parseInt();
39+
}
40+
41+
return new TermFrequencyOfFrequenciesAgg(vs, termLimit);
42+
}
43+
}
44+
45+
private static class Merger extends FacetMerger {
46+
private final TermFrequencyCounter result;
47+
48+
public Merger(int termLimit) {
49+
this.result = new TermFrequencyCounter();
50+
}
51+
52+
@Override
53+
public void merge(Object facetResult, Context mcontext) {
54+
if (facetResult instanceof Map) {
55+
result.merge((Map<String, Integer>) facetResult);
56+
}
57+
}
58+
59+
@Override
60+
public void finish(Context mcontext) {
61+
// never called
62+
}
63+
64+
@Override
65+
public Object getMergedResult() {
66+
Map<Integer, Integer> map = new LinkedHashMap<>();
67+
68+
result.getCounters()
69+
.forEach((value, freq) -> map.merge(freq, 1, Integer::sum));
70+
71+
return map;
72+
}
73+
}
74+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
package org.apache.solr.search.facet;
2+
3+
import java.io.IOException;
4+
import java.util.Arrays;
5+
import java.util.Collections;
6+
import java.util.function.IntFunction;
7+
8+
import org.apache.lucene.queries.function.ValueSource;
9+
10+
public class TermFrequencySlotAcc extends FuncSlotAcc {
11+
private TermFrequencyCounter[] result;
12+
private final int termLimit;
13+
14+
public TermFrequencySlotAcc(ValueSource values, FacetContext fcontext, int numSlots, int termLimit) {
15+
super(values, fcontext, numSlots);
16+
17+
this.result = new TermFrequencyCounter[numSlots];
18+
this.termLimit = termLimit;
19+
}
20+
21+
@Override
22+
public void collect(int doc, int slot, IntFunction<SlotContext> slotContext) throws IOException {
23+
if (result[slot] == null) {
24+
result[slot] = new TermFrequencyCounter();
25+
}
26+
result[slot].add(values.strVal(doc));
27+
}
28+
29+
@Override
30+
public int compare(int slotA, int slotB) {
31+
throw new UnsupportedOperationException();
32+
}
33+
34+
@Override
35+
public Object getValue(int slotNum) {
36+
if (result[slotNum] != null) {
37+
return result[slotNum].serialize(termLimit);
38+
} else {
39+
return Collections.emptyList();
40+
}
41+
}
42+
43+
@Override
44+
public void reset() {
45+
Arrays.fill(result, null);
46+
}
47+
48+
@Override
49+
public void resize(Resizer resizer) {
50+
result = resizer.resize(result, null);
51+
}
52+
}

solr/core/src/java/org/apache/solr/update/processor/LogUpdateProcessorFactory.java

+3-4
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,6 @@
4040
* the chain and prints them on finish(). At the Debug (FINE) level, a message
4141
* will be logged for each command prior to the next stage in the chain.
4242
* </p>
43-
* <p>
44-
* If the Log level is not &gt;= INFO the processor will not be created or added to the chain.
45-
* </p>
4643
*
4744
* @since solr 1.3
4845
*/
@@ -62,7 +59,7 @@ public void init( final NamedList args ) {
6259

6360
@Override
6461
public UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) {
65-
return log.isInfoEnabled() ? new LogUpdateProcessor(req, rsp, this, next) : null;
62+
return new LogUpdateProcessor(req, rsp, this, next);
6663
}
6764

6865
static class LogUpdateProcessor extends UpdateRequestProcessor {
@@ -185,6 +182,8 @@ public void finish() throws IOException {
185182

186183
if (log.isInfoEnabled()) {
187184
log.info(getLogStringAndClearRspToLog());
185+
} else {
186+
rsp.getToLog().clear();
188187
}
189188

190189
if (log.isWarnEnabled() && slowUpdateThresholdMillis >= 0) {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
package org.apache.solr.search.facet;
2+
3+
import java.io.ByteArrayInputStream;
4+
import java.io.ByteArrayOutputStream;
5+
import java.io.IOException;
6+
import java.io.InputStream;
7+
import java.util.HashMap;
8+
import java.util.Map;
9+
import java.util.Random;
10+
11+
import com.carrotsearch.randomizedtesting.annotations.Seed;
12+
import org.apache.lucene.util.LuceneTestCase;
13+
import org.apache.solr.common.util.JavaBinCodec;
14+
import org.apache.solr.common.util.SimpleOrderedMap;
15+
import org.junit.Test;
16+
17+
public class TermFrequencyCounterTest extends LuceneTestCase {
18+
private static final char[] ALPHABET = "abcdefghijklkmnopqrstuvwxyz".toCharArray();
19+
20+
@Test
21+
public void testAddValue() throws IOException {
22+
int iters = 10 * RANDOM_MULTIPLIER;
23+
24+
for (int i = 0; i < iters; i++) {
25+
TermFrequencyCounter counter = new TermFrequencyCounter();
26+
27+
int numValues = random().nextInt(100);
28+
Map<String, Integer> expected = new HashMap<>();
29+
for (int j = 0; j < numValues; j++) {
30+
String value = randomString(ALPHABET, random().nextInt(256));
31+
int count = random().nextInt(256);
32+
33+
addCount(counter, value, count);
34+
35+
expected.merge(value, count, Integer::sum);
36+
}
37+
38+
expected.forEach((value, count) -> assertCount(counter, value, count));
39+
40+
TermFrequencyCounter serialized = serdeser(counter, random().nextInt(Integer.MAX_VALUE));
41+
42+
expected.forEach((value, count) -> assertCount(serialized, value, count));
43+
}
44+
}
45+
46+
@Test
47+
public void testMerge() throws IOException {
48+
int iters = 10 * RANDOM_MULTIPLIER;
49+
50+
for (int i = 0; i < iters; i++) {
51+
TermFrequencyCounter x = new TermFrequencyCounter();
52+
53+
int numXValues = random().nextInt(100);
54+
Map<String, Integer> expectedXValues = new HashMap<>();
55+
for (int j = 0; j < numXValues; j++) {
56+
String value = randomString(ALPHABET, random().nextInt(256));
57+
int count = random().nextInt(256);
58+
59+
addCount(x, value, count);
60+
61+
expectedXValues.merge(value, count, Integer::sum);
62+
}
63+
64+
expectedXValues.forEach((value, count) -> assertCount(x, value, count));
65+
66+
TermFrequencyCounter y = new TermFrequencyCounter();
67+
68+
int numYValues = random().nextInt(100);
69+
Map<String, Integer> expectedYValues = new HashMap<>();
70+
for (int j = 0; j < numYValues; j++) {
71+
String value = randomString(ALPHABET, random().nextInt(256));
72+
int count = random().nextInt(256);
73+
74+
addCount(y, value, count);
75+
76+
expectedYValues.merge(value, count, Integer::sum);
77+
}
78+
79+
expectedYValues.forEach((value, count) -> assertCount(y, value, count));
80+
81+
TermFrequencyCounter merged = merge(x, y, random().nextInt(Integer.MAX_VALUE));
82+
83+
expectedYValues.forEach((value, count) -> expectedXValues.merge(value, count, Integer::sum));
84+
85+
expectedXValues.forEach((value, count) -> assertCount(merged, value, count));
86+
}
87+
}
88+
89+
private static String randomString(char[] alphabet, int length) {
90+
final StringBuilder sb = new StringBuilder(length);
91+
for (int i = 0; i < length; i++) {
92+
sb.append(alphabet[random().nextInt(alphabet.length)]);
93+
}
94+
return sb.toString();
95+
}
96+
97+
private static void addCount(TermFrequencyCounter counter, String value, int count) {
98+
for (int i = 0; i < count; i++) {
99+
counter.add(value);
100+
}
101+
}
102+
103+
private static void assertCount(TermFrequencyCounter counter, String value, int count) {
104+
assertEquals(
105+
"value " + value + " should have count " + count,
106+
count,
107+
(int) counter.getCounters().getOrDefault(value, 0)
108+
);
109+
}
110+
111+
private static TermFrequencyCounter serdeser(TermFrequencyCounter counter, int limit) throws IOException {
112+
JavaBinCodec codec = new JavaBinCodec();
113+
114+
ByteArrayOutputStream out = new ByteArrayOutputStream();
115+
codec.marshal(counter.serialize(limit), out);
116+
117+
InputStream in = new ByteArrayInputStream(out.toByteArray());
118+
counter = new TermFrequencyCounter();
119+
counter.merge((Map<String, Integer>) codec.unmarshal(in));
120+
121+
return counter;
122+
}
123+
124+
private static TermFrequencyCounter merge(
125+
TermFrequencyCounter counter,
126+
TermFrequencyCounter toMerge,
127+
int limit
128+
) throws IOException {
129+
JavaBinCodec codec = new JavaBinCodec();
130+
131+
ByteArrayOutputStream out = new ByteArrayOutputStream();
132+
codec.marshal(toMerge.serialize(limit), out);
133+
134+
InputStream in = new ByteArrayInputStream(out.toByteArray());
135+
counter.merge((Map<String, Integer>) codec.unmarshal(in));
136+
137+
return counter;
138+
}
139+
}

0 commit comments

Comments
 (0)