Skip to content

Commit 4a8f13c

Browse files
committed
Prototype bitmap frequency aggs
1 parent bd643fe commit 4a8f13c

7 files changed

+438
-16
lines changed

solr/core/src/java/org/apache/solr/search/ValueSourceParser.java

+7-1
Original file line numberDiff line numberDiff line change
@@ -57,14 +57,16 @@
5757
import org.apache.solr.search.facet.AggValueSource;
5858
import org.apache.solr.search.facet.AvgAgg;
5959
import org.apache.solr.search.facet.BitmapCollectorAgg;
60+
import org.apache.solr.search.facet.BitmapFrequencyAgg;
6061
import org.apache.solr.search.facet.CountAgg;
62+
import org.apache.solr.search.facet.FrequencyOfFrequencyAgg;
6163
import org.apache.solr.search.facet.HLLAgg;
6264
import org.apache.solr.search.facet.MinMaxAgg;
6365
import org.apache.solr.search.facet.PercentileAgg;
66+
import org.apache.solr.search.facet.RelatednessAgg;
6467
import org.apache.solr.search.facet.StddevAgg;
6568
import org.apache.solr.search.facet.SumAgg;
6669
import org.apache.solr.search.facet.SumsqAgg;
67-
import org.apache.solr.search.facet.RelatednessAgg;
6870
import org.apache.solr.search.facet.TopDocsAgg;
6971
import org.apache.solr.search.facet.UniqueAgg;
7072
import org.apache.solr.search.facet.UniqueBlockAgg;
@@ -1059,6 +1061,10 @@ public ValueSource parse(FunctionQParser fp) throws SyntaxError {
10591061

10601062
addParser("agg_bitmapcollector", new BitmapCollectorAgg.Parser());
10611063

1064+
addParser("agg_bitmapfreq", new BitmapFrequencyAgg.Parser());
1065+
1066+
addParser("agg_bitmapfreqfreq", new FrequencyOfFrequencyAgg.Parser());
1067+
10621068
addParser("childfield", new ChildFieldValueSourceParser());
10631069
}
10641070

solr/core/src/java/org/apache/solr/search/facet/BitmapCollectorAgg.java

+2-15
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
package org.apache.solr.search.facet;
22

3-
import java.io.ByteArrayOutputStream;
4-
import java.io.DataOutputStream;
53
import java.io.IOException;
64
import java.nio.ByteBuffer;
75
import java.util.Arrays;
@@ -73,7 +71,7 @@ public Object getValue(int slotNum) {
7371
byte[] serialised;
7472
if (result[slotNum] != null) {
7573
result[slotNum].runOptimize();
76-
serialised = bitmapToBytes(result[slotNum]);
74+
serialised = BitmapUtil.bitmapToBytes(result[slotNum]);
7775
} else {
7876
serialised = new byte[0];
7977
}
@@ -116,20 +114,9 @@ public void finish(Context mcontext) {
116114
public Object getMergedResult() {
117115
combined.runOptimize();
118116
SimpleOrderedMap map = new SimpleOrderedMap();
119-
map.add(KEY, bitmapToBytes(combined));
117+
map.add(KEY, BitmapUtil.bitmapToBytes(combined));
120118
return map;
121119
}
122120
}
123121

124-
private static byte[] bitmapToBytes(MutableRoaringBitmap bitmap) {
125-
ByteArrayOutputStream bos = new ByteArrayOutputStream();
126-
DataOutputStream dos = new DataOutputStream(bos);
127-
try {
128-
bitmap.serialize(dos);
129-
dos.close();
130-
return bos.toByteArray();
131-
} catch (IOException ioe) {
132-
throw new RuntimeException("Failed to serialise RoaringBitmap to bytes", ioe);
133-
}
134-
}
135122
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
package org.apache.solr.search.facet;
2+
3+
import java.util.ArrayList;
4+
import java.util.List;
5+
6+
import org.apache.solr.common.util.SimpleOrderedMap;
7+
import org.roaringbitmap.RoaringBitmap;
8+
9+
public class BitmapFrequencies {
10+
private final List<RoaringBitmap> frequencies;
11+
private final Integer maxFrequency;
12+
private RoaringBitmap overflow;
13+
14+
public BitmapFrequencies() {
15+
this.frequencies = new ArrayList<>();
16+
this.maxFrequency = null;
17+
}
18+
19+
public BitmapFrequencies(int maxFrequency) {
20+
this.frequencies = new ArrayList<>(maxFrequency);
21+
this.maxFrequency = maxFrequency;
22+
}
23+
24+
public BitmapFrequencies(SimpleOrderedMap<Object> serialized) {
25+
this();
26+
27+
Iterable<byte[]> serializedFrequencies = (Iterable<byte[]>) serialized.get("frequencies");
28+
if (serializedFrequencies != null) {
29+
for (byte[] bytes : serializedFrequencies) {
30+
this.frequencies.add(BitmapUtil.bytesToBitmap(bytes));
31+
}
32+
}
33+
34+
byte[] overflow = (byte[]) serialized.get("overflow");
35+
if (overflow != null) {
36+
this.overflow = BitmapUtil.bytesToBitmap(overflow);
37+
}
38+
}
39+
40+
public List<RoaringBitmap> getFrequencies() {
41+
return this.frequencies;
42+
}
43+
44+
public RoaringBitmap getOverflow() {
45+
return this.overflow;
46+
}
47+
48+
public void add(int value) {
49+
for (RoaringBitmap frequency : frequencies) {
50+
if (!frequency.contains(value)) {
51+
frequency.add(value);
52+
return;
53+
}
54+
frequency.remove(value);
55+
}
56+
57+
if (maxFrequency == null || frequencies.size() < maxFrequency) {
58+
frequencies.add(RoaringBitmap.bitmapOf(value));
59+
} else {
60+
if (overflow == null) {
61+
overflow = RoaringBitmap.bitmapOf(value);
62+
} else {
63+
overflow.add(value);
64+
}
65+
}
66+
}
67+
68+
public SimpleOrderedMap<Object> serialize() {
69+
SimpleOrderedMap<Object> map = new SimpleOrderedMap<>();
70+
71+
if (!frequencies.isEmpty()) {
72+
List<byte[]> serialized = new ArrayList<>(frequencies.size());
73+
for (RoaringBitmap bitmap : frequencies) {
74+
bitmap.runOptimize();
75+
serialized.add(BitmapUtil.bitmapToBytes(bitmap));
76+
}
77+
map.add("frequencies", serialized);
78+
}
79+
80+
if (overflow != null) {
81+
map.add("overflow", BitmapUtil.bitmapToBytes(overflow));
82+
}
83+
84+
return map;
85+
}
86+
87+
// Merges (in-place) with frequencies from another sample. The supplied BitmapFrequencies is no longer valid after
88+
// this operation.
89+
public void merge(BitmapFrequencies other) {
90+
int smallest = Math.min(frequencies.size(), other.frequencies.size());
91+
92+
RoaringBitmap carried = new RoaringBitmap();
93+
int f = 0;
94+
while (f < smallest) {
95+
// x(f) is the set of values which occurred with frequency f in this sample
96+
// y(f) is the set of values which occurred with frequency f in the sample to be merged
97+
// carried is the intersection of x(f-1) and y(f-1)
98+
//
99+
// 1) x(f) and y(f) may intersect
100+
// 2) x(f) does not intersect with x(f-1)
101+
// 3) y(f) does not intersect with y(f-1)
102+
// 4) For carried to intersect with x(f), at least one value would have to be in x(f-1), y(f-1) and x(f).
103+
// As per 2), this is impossible.
104+
// 5) For carried to intersect with y(f), at least one value would have to be in x(f-1), y(f-1) and y(f).
105+
// As per 3), this is impossible.
106+
// 6) Therefore, carried does not intersect with either x(f) or y(f).
107+
RoaringBitmap x = frequencies.get(f);
108+
RoaringBitmap y = other.frequencies.get(f);
109+
110+
// We first merge carried, x, and y.
111+
// Since x and y may intersect, the result may contain some values with frequency at most f+1.
112+
RoaringBitmap merged = carried;
113+
merged.or(x);
114+
merged.or(y);
115+
116+
// We now calculate the values in the merged set which have frequency f+1, and remove them (to be carried).
117+
carried = x;
118+
carried.and(y);
119+
merged.andNot(carried);
120+
121+
frequencies.set(f, merged);
122+
f++;
123+
}
124+
125+
while (f < other.frequencies.size()) {
126+
RoaringBitmap merged = other.frequencies.get(f);
127+
128+
if (carried != null) {
129+
merged.or(carried);
130+
carried = null;
131+
}
132+
133+
frequencies.add(merged);
134+
f++;
135+
}
136+
137+
if (maxFrequency == null || frequencies.size() < maxFrequency) {
138+
if (carried != null) {
139+
frequencies.add(carried);
140+
}
141+
} else {
142+
if (other.overflow != null) {
143+
if (overflow == null) {
144+
overflow = other.overflow;
145+
} else {
146+
overflow.or(other.overflow);
147+
}
148+
}
149+
150+
if (carried != null) {
151+
if (overflow == null) {
152+
overflow = carried;
153+
} else {
154+
overflow.or(carried);
155+
}
156+
}
157+
}
158+
}
159+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
package org.apache.solr.search.facet;
2+
3+
import org.apache.lucene.queries.function.ValueSource;
4+
import org.apache.solr.common.util.SimpleOrderedMap;
5+
import org.apache.solr.search.FunctionQParser;
6+
import org.apache.solr.search.SyntaxError;
7+
import org.apache.solr.search.ValueSourceParser;
8+
9+
// Calculates the frequency of ordinal values, up to an optional maximum frequency
10+
//
11+
// The response is a map with the following fields:
12+
// - frequencies: an array where frequencies[i] is a Roaring Bitmap of the ordinal values with frequency i (omitted if
13+
// empty)
14+
// - overflow: a Roaring Bitmap of ordinal values with more than the maximum frequency (omitted if empty)
15+
//
16+
// Lacking a coherent definition of magnitude other than the raw count, this aggregate cannot be used for sorting.
17+
public class BitmapFrequencyAgg extends SimpleAggValueSource {
18+
private final Integer maxFrequency;
19+
20+
public BitmapFrequencyAgg(ValueSource vs, Integer maxFrequency) {
21+
super("bitmapfreq", vs);
22+
23+
this.maxFrequency = maxFrequency;
24+
}
25+
26+
@Override
27+
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) {
28+
return new BitmapFrequencySlotAcc(getArg(), fcontext, numSlots, maxFrequency);
29+
}
30+
31+
@Override
32+
public FacetMerger createFacetMerger(Object prototype) {
33+
if (maxFrequency == null) {
34+
return new BitmapFrequencyFacetMerger();
35+
} else {
36+
return new BitmapFrequencyFacetMerger(maxFrequency);
37+
}
38+
}
39+
40+
public static class Parser extends ValueSourceParser {
41+
@Override
42+
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
43+
ValueSource valueSource = fp.parseValueSource();
44+
45+
Integer maxFrequency = null;
46+
if (fp.hasMoreArguments()) {
47+
maxFrequency = fp.parseInt();
48+
}
49+
50+
return new BitmapFrequencyAgg(valueSource, maxFrequency);
51+
}
52+
}
53+
54+
private static class BitmapFrequencyFacetMerger extends FacetMerger {
55+
private final BitmapFrequencies result;
56+
57+
public BitmapFrequencyFacetMerger() {
58+
this.result = new BitmapFrequencies();
59+
}
60+
61+
public BitmapFrequencyFacetMerger(int maxFrequency) {
62+
this.result = new BitmapFrequencies(maxFrequency);
63+
}
64+
65+
@Override
66+
public void merge(Object facetResult, Context mcontext) {
67+
if (facetResult instanceof SimpleOrderedMap) {
68+
BitmapFrequencies deserialized = new BitmapFrequencies((SimpleOrderedMap<Object>) facetResult);
69+
70+
result.merge(deserialized);
71+
}
72+
}
73+
74+
@Override
75+
public void finish(Context mcontext) {
76+
// never called
77+
}
78+
79+
@Override
80+
public Object getMergedResult() {
81+
return result.serialize();
82+
}
83+
}
84+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
package org.apache.solr.search.facet;
2+
3+
import java.io.IOException;
4+
import java.util.Arrays;
5+
import java.util.Collections;
6+
import java.util.function.IntFunction;
7+
8+
import org.apache.lucene.queries.function.ValueSource;
9+
10+
public class BitmapFrequencySlotAcc extends FuncSlotAcc {
11+
private BitmapFrequencies[] result;
12+
private final Integer maxFrequency;
13+
14+
public BitmapFrequencySlotAcc(ValueSource values, FacetContext fcontext, int numSlots, Integer maxFrequency) {
15+
super(values, fcontext, numSlots);
16+
17+
this.result = new BitmapFrequencies[numSlots];
18+
this.maxFrequency = maxFrequency;
19+
}
20+
21+
@Override
22+
public void collect(int doc, int slot, IntFunction<SlotContext> slotContext) throws IOException {
23+
if (result[slot] == null) {
24+
if (this.maxFrequency != null) {
25+
result[slot] = new BitmapFrequencies(this.maxFrequency);
26+
} else {
27+
result[slot] = new BitmapFrequencies();
28+
}
29+
}
30+
result[slot].add(values.intVal(doc));
31+
}
32+
33+
@Override
34+
public int compare(int slotA, int slotB) {
35+
throw new UnsupportedOperationException();
36+
}
37+
38+
@Override
39+
public Object getValue(int slotNum) {
40+
if (result[slotNum] != null) {
41+
return result[slotNum].serialize();
42+
} else {
43+
return Collections.emptyList();
44+
}
45+
}
46+
47+
@Override
48+
public void reset() {
49+
Arrays.fill(result, null);
50+
}
51+
52+
@Override
53+
public void resize(Resizer resizer) {
54+
result = resizer.resize(result, null);
55+
}
56+
}

0 commit comments

Comments
 (0)