|
| 1 | +package org.apache.solr.search.facet; |
| 2 | + |
| 3 | +import java.util.ArrayList; |
| 4 | +import java.util.List; |
| 5 | + |
| 6 | +import org.apache.solr.common.util.SimpleOrderedMap; |
| 7 | +import org.roaringbitmap.RoaringBitmap; |
| 8 | + |
| 9 | +public class BitmapFrequencies { |
| 10 | + private final List<RoaringBitmap> frequencies; |
| 11 | + private final Integer maxFrequency; |
| 12 | + private RoaringBitmap overflow; |
| 13 | + |
| 14 | + public BitmapFrequencies() { |
| 15 | + this.frequencies = new ArrayList<>(); |
| 16 | + this.maxFrequency = null; |
| 17 | + } |
| 18 | + |
| 19 | + public BitmapFrequencies(int maxFrequency) { |
| 20 | + this.frequencies = new ArrayList<>(maxFrequency); |
| 21 | + this.maxFrequency = maxFrequency; |
| 22 | + } |
| 23 | + |
| 24 | + public BitmapFrequencies(SimpleOrderedMap<Object> serialized) { |
| 25 | + this(); |
| 26 | + |
| 27 | + Iterable<byte[]> serializedFrequencies = (Iterable<byte[]>) serialized.get("frequencies"); |
| 28 | + if (serializedFrequencies != null) { |
| 29 | + for (byte[] bytes : serializedFrequencies) { |
| 30 | + this.frequencies.add(BitmapUtil.bytesToBitmap(bytes)); |
| 31 | + } |
| 32 | + } |
| 33 | + |
| 34 | + byte[] overflow = (byte[]) serialized.get("overflow"); |
| 35 | + if (overflow != null) { |
| 36 | + this.overflow = BitmapUtil.bytesToBitmap(overflow); |
| 37 | + } |
| 38 | + } |
| 39 | + |
| 40 | + public List<RoaringBitmap> getFrequencies() { |
| 41 | + return this.frequencies; |
| 42 | + } |
| 43 | + |
| 44 | + public RoaringBitmap getOverflow() { |
| 45 | + return this.overflow; |
| 46 | + } |
| 47 | + |
| 48 | + public void add(int value) { |
| 49 | + for (RoaringBitmap frequency : frequencies) { |
| 50 | + if (!frequency.contains(value)) { |
| 51 | + frequency.add(value); |
| 52 | + return; |
| 53 | + } |
| 54 | + frequency.remove(value); |
| 55 | + } |
| 56 | + |
| 57 | + if (maxFrequency == null || frequencies.size() < maxFrequency) { |
| 58 | + frequencies.add(RoaringBitmap.bitmapOf(value)); |
| 59 | + } else { |
| 60 | + if (overflow == null) { |
| 61 | + overflow = RoaringBitmap.bitmapOf(value); |
| 62 | + } else { |
| 63 | + overflow.add(value); |
| 64 | + } |
| 65 | + } |
| 66 | + } |
| 67 | + |
| 68 | + public SimpleOrderedMap<Object> serialize() { |
| 69 | + SimpleOrderedMap<Object> map = new SimpleOrderedMap<>(); |
| 70 | + |
| 71 | + if (!frequencies.isEmpty()) { |
| 72 | + List<byte[]> serialized = new ArrayList<>(frequencies.size()); |
| 73 | + for (RoaringBitmap bitmap : frequencies) { |
| 74 | + bitmap.runOptimize(); |
| 75 | + serialized.add(BitmapUtil.bitmapToBytes(bitmap)); |
| 76 | + } |
| 77 | + map.add("frequencies", serialized); |
| 78 | + } |
| 79 | + |
| 80 | + if (overflow != null) { |
| 81 | + map.add("overflow", BitmapUtil.bitmapToBytes(overflow)); |
| 82 | + } |
| 83 | + |
| 84 | + return map; |
| 85 | + } |
| 86 | + |
| 87 | + // Merges (in-place) with frequencies from another sample. The supplied BitmapFrequencies is no longer valid after |
| 88 | + // this operation. |
| 89 | + public void merge(BitmapFrequencies other) { |
| 90 | + int smallest = Math.min(frequencies.size(), other.frequencies.size()); |
| 91 | + |
| 92 | + RoaringBitmap carried = new RoaringBitmap(); |
| 93 | + int f = 0; |
| 94 | + while (f < smallest) { |
| 95 | + // x(f) is the set of values which occurred with frequency f in this sample |
| 96 | + // y(f) is the set of values which occurred with frequency f in the sample to be merged |
| 97 | + // carried is the intersection of x(f-1) and y(f-1) |
| 98 | + // |
| 99 | + // 1) x(f) and y(f) may intersect |
| 100 | + // 2) x(f) does not intersect with x(f-1) |
| 101 | + // 3) y(f) does not intersect with y(f-1) |
| 102 | + // 4) For carried to intersect with x(f), at least one value would have to be in x(f-1), y(f-1) and x(f). |
| 103 | + // As per 2), this is impossible. |
| 104 | + // 5) For carried to intersect with y(f), at least one value would have to be in x(f-1), y(f-1) and y(f). |
| 105 | + // As per 3), this is impossible. |
| 106 | + // 6) Therefore, carried does not intersect with either x(f) or y(f). |
| 107 | + RoaringBitmap x = frequencies.get(f); |
| 108 | + RoaringBitmap y = other.frequencies.get(f); |
| 109 | + |
| 110 | + // We first merge carried, x, and y. |
| 111 | + // Since x and y may intersect, the result may contain some values with frequency at most f+1. |
| 112 | + RoaringBitmap merged = carried; |
| 113 | + merged.or(x); |
| 114 | + merged.or(y); |
| 115 | + |
| 116 | + // We now calculate the values in the merged set which have frequency f+1, and remove them (to be carried). |
| 117 | + carried = x; |
| 118 | + carried.and(y); |
| 119 | + merged.andNot(carried); |
| 120 | + |
| 121 | + frequencies.set(f, merged); |
| 122 | + f++; |
| 123 | + } |
| 124 | + |
| 125 | + while (f < other.frequencies.size()) { |
| 126 | + RoaringBitmap merged = other.frequencies.get(f); |
| 127 | + |
| 128 | + if (carried != null) { |
| 129 | + merged.or(carried); |
| 130 | + carried = null; |
| 131 | + } |
| 132 | + |
| 133 | + frequencies.add(merged); |
| 134 | + f++; |
| 135 | + } |
| 136 | + |
| 137 | + if (maxFrequency == null || frequencies.size() < maxFrequency) { |
| 138 | + if (carried != null) { |
| 139 | + frequencies.add(carried); |
| 140 | + } |
| 141 | + } else { |
| 142 | + if (other.overflow != null) { |
| 143 | + if (overflow == null) { |
| 144 | + overflow = other.overflow; |
| 145 | + } else { |
| 146 | + overflow.or(other.overflow); |
| 147 | + } |
| 148 | + } |
| 149 | + |
| 150 | + if (carried != null) { |
| 151 | + if (overflow == null) { |
| 152 | + overflow = carried; |
| 153 | + } else { |
| 154 | + overflow.or(carried); |
| 155 | + } |
| 156 | + } |
| 157 | + } |
| 158 | + } |
| 159 | +} |
0 commit comments