Skip to content

Commit d9f9739

Browse files
committed
Prototype bitmap frequency aggs
1 parent bd643fe commit d9f9739

7 files changed

+478
-16
lines changed

solr/core/src/java/org/apache/solr/search/ValueSourceParser.java

+7-1
Original file line numberDiff line numberDiff line change
@@ -57,14 +57,16 @@
5757
import org.apache.solr.search.facet.AggValueSource;
5858
import org.apache.solr.search.facet.AvgAgg;
5959
import org.apache.solr.search.facet.BitmapCollectorAgg;
60+
import org.apache.solr.search.facet.BitmapFrequencyAgg;
6061
import org.apache.solr.search.facet.CountAgg;
62+
import org.apache.solr.search.facet.FrequencyOfFrequenciesAgg;
6163
import org.apache.solr.search.facet.HLLAgg;
6264
import org.apache.solr.search.facet.MinMaxAgg;
6365
import org.apache.solr.search.facet.PercentileAgg;
66+
import org.apache.solr.search.facet.RelatednessAgg;
6467
import org.apache.solr.search.facet.StddevAgg;
6568
import org.apache.solr.search.facet.SumAgg;
6669
import org.apache.solr.search.facet.SumsqAgg;
67-
import org.apache.solr.search.facet.RelatednessAgg;
6870
import org.apache.solr.search.facet.TopDocsAgg;
6971
import org.apache.solr.search.facet.UniqueAgg;
7072
import org.apache.solr.search.facet.UniqueBlockAgg;
@@ -1059,6 +1061,10 @@ public ValueSource parse(FunctionQParser fp) throws SyntaxError {
10591061

10601062
addParser("agg_bitmapcollector", new BitmapCollectorAgg.Parser());
10611063

1064+
addParser("agg_bitmapfreq", new BitmapFrequencyAgg.Parser());
1065+
1066+
addParser("agg_bitmapfreqfreq", new FrequencyOfFrequenciesAgg.Parser());
1067+
10621068
addParser("childfield", new ChildFieldValueSourceParser());
10631069
}
10641070

solr/core/src/java/org/apache/solr/search/facet/BitmapCollectorAgg.java

+2-15
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
package org.apache.solr.search.facet;
22

3-
import java.io.ByteArrayOutputStream;
4-
import java.io.DataOutputStream;
53
import java.io.IOException;
64
import java.nio.ByteBuffer;
75
import java.util.Arrays;
@@ -73,7 +71,7 @@ public Object getValue(int slotNum) {
7371
byte[] serialised;
7472
if (result[slotNum] != null) {
7573
result[slotNum].runOptimize();
76-
serialised = bitmapToBytes(result[slotNum]);
74+
serialised = BitmapUtil.bitmapToBytes(result[slotNum]);
7775
} else {
7876
serialised = new byte[0];
7977
}
@@ -116,20 +114,9 @@ public void finish(Context mcontext) {
116114
public Object getMergedResult() {
117115
combined.runOptimize();
118116
SimpleOrderedMap map = new SimpleOrderedMap();
119-
map.add(KEY, bitmapToBytes(combined));
117+
map.add(KEY, BitmapUtil.bitmapToBytes(combined));
120118
return map;
121119
}
122120
}
123121

124-
private static byte[] bitmapToBytes(MutableRoaringBitmap bitmap) {
125-
ByteArrayOutputStream bos = new ByteArrayOutputStream();
126-
DataOutputStream dos = new DataOutputStream(bos);
127-
try {
128-
bitmap.serialize(dos);
129-
dos.close();
130-
return bos.toByteArray();
131-
} catch (IOException ioe) {
132-
throw new RuntimeException("Failed to serialise RoaringBitmap to bytes", ioe);
133-
}
134-
}
135122
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
package org.apache.solr.search.facet;
2+
3+
import org.apache.lucene.queries.function.ValueSource;
4+
import org.apache.solr.common.util.SimpleOrderedMap;
5+
import org.apache.solr.search.FunctionQParser;
6+
import org.apache.solr.search.SyntaxError;
7+
import org.apache.solr.search.ValueSourceParser;
8+
9+
/**
10+
* Calculates the frequency of ordinal values using Roaring Bitmaps.
11+
*
12+
* The response is a map with the following fields:
13+
* - bitmaps: an array of bitmaps, where the frequency of a value x is given by the sum of {@code 2^i} for all values
14+
* of {@code i} where {@code bitmaps[i].contains(x)}
15+
* - overflow: a bitmap of ordinal values with {@code frequency >= 2^(bitmaps.length)}
16+
*
17+
* Lacking a coherent definition of magnitude other than the raw count, this aggregate cannot be used for sorting.
18+
*/
19+
public class BitmapFrequencyAgg extends SimpleAggValueSource {
20+
private final int size;
21+
22+
public BitmapFrequencyAgg(ValueSource vs, int size) {
23+
super("bitmapfreq", vs);
24+
25+
this.size = size;
26+
}
27+
28+
@Override
29+
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) {
30+
return new BitmapFrequencySlotAcc(getArg(), fcontext, numSlots, size);
31+
}
32+
33+
@Override
34+
public FacetMerger createFacetMerger(Object prototype) {
35+
return new Merger(size);
36+
}
37+
38+
public static class Parser extends ValueSourceParser {
39+
@Override
40+
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
41+
ValueSource valueSource = fp.parseValueSource();
42+
43+
int size = 16;
44+
if (fp.hasMoreArguments()) {
45+
size = fp.parseInt();
46+
}
47+
48+
return new BitmapFrequencyAgg(valueSource, size);
49+
}
50+
}
51+
52+
private static class Merger extends FacetMerger {
53+
private final int size;
54+
private BitmapFrequencyCounter result;
55+
56+
public Merger(int size) {
57+
this.size = size;
58+
this.result = new BitmapFrequencyCounter(size);
59+
}
60+
61+
@Override
62+
public void merge(Object facetResult, Context mcontext) {
63+
if (facetResult instanceof SimpleOrderedMap) {
64+
BitmapFrequencyCounter deserialized = new BitmapFrequencyCounter(size);
65+
deserialized.deserialize((SimpleOrderedMap<Object>) facetResult);
66+
67+
result = result.merge(deserialized);
68+
}
69+
}
70+
71+
@Override
72+
public void finish(Context mcontext) {
73+
// never called
74+
}
75+
76+
@Override
77+
public Object getMergedResult() {
78+
return result.serialize();
79+
}
80+
}
81+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
package org.apache.solr.search.facet;
2+
3+
import org.apache.solr.common.util.SimpleOrderedMap;
4+
import org.roaringbitmap.RoaringBitmap;
5+
6+
/**
7+
* Counts frequencies of ordinal values using Roaring Bitmaps.
8+
*/
9+
public class BitmapFrequencyCounter {
10+
private final RoaringBitmap[] bitmaps;
11+
private RoaringBitmap overflow;
12+
13+
/**
14+
* Constructs a new frequency counter. The maximum countable frequency will be given by {@code (2^size)-1}.
15+
*
16+
* @param size The maximum size of the frequencies list
17+
*/
18+
public BitmapFrequencyCounter(int size) {
19+
this.bitmaps = new RoaringBitmap[size];
20+
}
21+
22+
/**
23+
* An array of bitmaps encoding frequencies of values: the frequency of a value x is given by the sum of {@code 2^i}
24+
* for all values of {@code i} where {@code bitmaps[i].contains(x)}.
25+
*
26+
* @return The encoded frequencies
27+
*/
28+
public RoaringBitmap[] getBitmaps() {
29+
return this.bitmaps;
30+
}
31+
32+
/**
33+
* The overflow set of all values with {@code frequency >= 2^(bitmaps.length)}.
34+
*
35+
* @return The overflow set
36+
*/
37+
public RoaringBitmap getOverflow() {
38+
return this.overflow;
39+
}
40+
41+
/**
42+
* Adds one occurrence of the given value to the counter.
43+
*
44+
* @param value The value to add
45+
*/
46+
public void add(int value) {
47+
// This is just binary addition x+1=y - we carry the value till we find an empty column
48+
for (int i = 0; i < bitmaps.length; i++) {
49+
RoaringBitmap bitmap = bitmaps[i];
50+
if (bitmap == null) {
51+
bitmap = bitmaps[i] = new RoaringBitmap();
52+
}
53+
54+
if (!bitmap.contains(value)) {
55+
bitmap.add(value);
56+
return;
57+
}
58+
59+
bitmap.remove(value);
60+
}
61+
62+
// If we reach this point, the frequency of this value is >= 2^(bitmaps.length)
63+
64+
if (overflow == null) {
65+
overflow = new RoaringBitmap();
66+
}
67+
68+
overflow.add(value);
69+
}
70+
71+
/**
72+
* Serializes the counter.
73+
*
74+
* @return The serialized data
75+
*/
76+
public SimpleOrderedMap<Object> serialize() {
77+
SimpleOrderedMap<Object> serialized = new SimpleOrderedMap<>();
78+
79+
byte[][] serializedBitmaps = new byte[bitmaps.length][];
80+
81+
int i = 0;
82+
while (i < bitmaps.length) {
83+
RoaringBitmap bitmap = bitmaps[i];
84+
if (bitmap == null) {
85+
break;
86+
}
87+
88+
bitmap.runOptimize();
89+
serializedBitmaps[i] = BitmapUtil.bitmapToBytes(bitmap);
90+
91+
i++;
92+
}
93+
94+
if (i > 0) {
95+
serialized.add("bitmaps", serializedBitmaps);
96+
}
97+
98+
if (overflow != null) {
99+
overflow.runOptimize();
100+
serialized.add("overflow", BitmapUtil.bitmapToBytes(overflow));
101+
}
102+
103+
return serialized;
104+
}
105+
106+
/**
107+
* Populates the counter from the given serialized data.
108+
*
109+
* The counter must be fresh (with no values previously added), and have the same size as the counter from which the
110+
* serialized data was generated.
111+
*
112+
* @param serialized The serialized data
113+
*/
114+
public void deserialize(SimpleOrderedMap<Object> serialized) {
115+
byte[][] serializedBitmaps = (byte[][]) serialized.get("bitmaps");
116+
if (serializedBitmaps != null) {
117+
for (int i = 0; i < bitmaps.length; i++) {
118+
bitmaps[i] = BitmapUtil.bytesToBitmap(serializedBitmaps[i]);
119+
}
120+
}
121+
122+
byte[] overflow = (byte[]) serialized.get("overflow");
123+
if (overflow != null) {
124+
this.overflow = BitmapUtil.bytesToBitmap(overflow);
125+
} else {
126+
this.overflow = null;
127+
}
128+
}
129+
130+
/**
131+
* Merges this counter with another (in-place).
132+
*
133+
* The other counter must have the same size as this counter. After this operation, the returned counter will contain
134+
* the values from both counters with their frequencies added together, and references to either of the original
135+
* counters should be discarded (since either may now be invalid, and one will have been modified and returned).
136+
*
137+
* @param other The counter to merge in
138+
* @return The merged counter
139+
*/
140+
public BitmapFrequencyCounter merge(BitmapFrequencyCounter other) {
141+
// The algorithm here is a ripple-carry adder in two dimensions, built from half-adders that are adapted from the
142+
// standard (where s is the sum, and c the carried value):
143+
//
144+
// s = x xor y
145+
// c = x and y
146+
//
147+
// to:
148+
//
149+
// s = x xor y
150+
// c = y andnot s
151+
//
152+
// which allows in-place modification of bitmaps (x modified into s, y modified into c).
153+
154+
RoaringBitmap c;
155+
156+
int i = 0;
157+
158+
RoaringBitmap x = bitmaps[i];
159+
RoaringBitmap y = other.bitmaps[i];
160+
if (x == null) {
161+
return other;
162+
} else if (y == null) {
163+
return this;
164+
}
165+
166+
x.xor(y); // x2 = x1 xor y1
167+
y.andNot(x); // y2 = y1 andnot x2
168+
169+
c = y; // c1 = y2
170+
171+
i++;
172+
173+
while (i < bitmaps.length) {
174+
x = bitmaps[i];
175+
y = other.bitmaps[i];
176+
if (x == null || y == null) {
177+
break;
178+
}
179+
180+
x.xor(y); // x2 = x1 xor y1
181+
y.andNot(x); // y2 = y1 andnot x2
182+
x.xor(c); // x3 = x2 xor c1
183+
184+
c.andNot(x); // c2 = c1 andnot x3
185+
c.or(y); // c3 = c2 or y2
186+
187+
i++;
188+
}
189+
190+
while (i < bitmaps.length) {
191+
x = bitmaps[i];
192+
if (x == null) {
193+
break;
194+
}
195+
196+
x.xor(c); // x2 = x1 xor c1
197+
c.andNot(x); // c2 = c1 andnot x2
198+
199+
i++;
200+
}
201+
202+
while (i < bitmaps.length) {
203+
x = other.bitmaps[i];
204+
if (x == null) {
205+
break;
206+
}
207+
208+
x.xor(c); // x2 = x1 xor c1
209+
c.andNot(x); // c2 = c1 andnot x2
210+
211+
bitmaps[i] = x;
212+
213+
i++;
214+
}
215+
216+
if (i == bitmaps.length) {
217+
if (overflow == null) {
218+
overflow = c;
219+
} else {
220+
overflow.or(c);
221+
}
222+
}
223+
224+
return this;
225+
}
226+
}

0 commit comments

Comments
 (0)