Skip to content

Commit c99a062

Browse files
authored
Merge pull request #417 from RoaringBitmap/faster-faster-iandnot
Faster faster iandnot
2 parents a550de6 + 34fc2bf commit c99a062

4 files changed

+167
-4
lines changed

arraycontainer.go

+48-4
Original file line numberDiff line numberDiff line change
@@ -664,10 +664,54 @@ func (ac *arrayContainer) iandNot(a container) container {
664664
}
665665

666666
func (ac *arrayContainer) iandNotRun16(rc *runContainer16) container {
667-
rcb := rc.toBitmapContainer()
668-
acb := ac.toBitmapContainer()
669-
acb.iandNotBitmapSurely(rcb)
670-
*ac = *(acb.toArrayContainer())
667+
// Fast path: if either the array container or the run container is empty, the result is the array.
668+
if ac.isEmpty() || rc.isEmpty() {
669+
// Empty
670+
return ac
671+
}
672+
// Fast path: if the run container is full, the result is empty.
673+
if rc.isFull() {
674+
ac.content = ac.content[:0]
675+
return ac
676+
}
677+
current_run := 0
678+
// All values in [start_run, end_end] are part of the run
679+
start_run := rc.iv[current_run].start
680+
end_end := start_run + rc.iv[current_run].length
681+
// We are going to read values in the array at index i, and we are
682+
// going to write them at index pos. So we do in-place processing.
683+
// We always have that pos <= i by construction. So we can either
684+
// overwrite a value just read, or a value that was previous read.
685+
pos := 0
686+
i := 0
687+
for ; i < len(ac.content); i++ {
688+
if ac.content[i] < start_run {
689+
// the value in the array appears before the run [start_run, end_end]
690+
ac.content[pos] = ac.content[i]
691+
pos++
692+
} else if ac.content[i] <= end_end {
693+
// nothing to do, the value is in the array but also in the run.
694+
} else {
695+
// We have the value in the array after the run. We cannot tell
696+
// whether we need to keep it or not. So let us move to another run.
697+
if current_run+1 < len(rc.iv) {
698+
current_run++
699+
start_run = rc.iv[current_run].start
700+
end_end = start_run + rc.iv[current_run].length
701+
i-- // retry with the same i
702+
} else {
703+
// We have exhausted the number of runs. We can keep the rest of the values
704+
// from i to len(ac.content) - 1 inclusively.
705+
break // We are done, the rest of the array will be kept
706+
}
707+
}
708+
}
709+
for ; i < len(ac.content); i++ {
710+
ac.content[pos] = ac.content[i]
711+
pos++
712+
}
713+
// We 'shink' the slice.
714+
ac.content = ac.content[:pos]
671715
return ac
672716
}
673717

arraycontainer_test.go

+22
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"testing"
88

99
"github.com/stretchr/testify/assert"
10+
"github.com/stretchr/testify/require"
1011
)
1112

1213
func TestArrayContainerTransition(t *testing.T) {
@@ -332,6 +333,27 @@ func TestArrayContainerEtc070(t *testing.T) {
332333
assert.Equal(t, 1, ac10.numberOfRuns())
333334
}
334335

336+
func TestArrayContainerIAndNot(t *testing.T) {
337+
var ac container
338+
ac = newArrayContainer()
339+
ac.iadd(12)
340+
ac.iadd(27)
341+
ac.iadd(32)
342+
ac.iadd(88)
343+
ac.iadd(188)
344+
ac.iadd(289)
345+
346+
var rc container
347+
rc = newRunContainer16Range(0, 15)
348+
rc = rc.iaddRange(1500, 2000)
349+
rc = rc.iaddRange(55, 100)
350+
rc = rc.iaddRange(25, 50)
351+
ac = ac.iandNot(rc)
352+
353+
require.ElementsMatch(t, []uint16{188, 289}, ac.(*arrayContainer).content)
354+
require.Equal(t, 2, ac.getCardinality())
355+
}
356+
335357
func TestArrayContainerIand(t *testing.T) {
336358
a := NewBitmap()
337359
a.AddRange(0, 200000)

benchmark_test.go

+96
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package roaring
33
import (
44
"bytes"
55
"fmt"
6+
"math"
67
"math/rand"
78
"testing"
89

@@ -1132,3 +1133,98 @@ func BenchmarkAndAny(b *testing.B) {
11321133
runSet("small-filters", genOne(r, largeSize, domain), genMulti(r, filtersNum, smallSize, domain))
11331134
runSet("equal", genOne(r, defaultSize, domain), genMulti(r, filtersNum, defaultSize, domain))
11341135
}
1136+
1137+
func BenchmarkAndNot(b *testing.B) {
1138+
type generator struct {
1139+
name string
1140+
f func() *Bitmap
1141+
}
1142+
makeRunContainer := generator{
1143+
name: "run",
1144+
f: func() *Bitmap {
1145+
rb := NewBitmap()
1146+
for i := 0; i < 100; i++ {
1147+
start := rand.Intn(math.MaxUint16)
1148+
limit := start + rand.Intn(math.MaxUint16-start)
1149+
rb.AddRange(uint64(start), uint64(limit))
1150+
}
1151+
rb.RunOptimize()
1152+
return rb
1153+
},
1154+
}
1155+
1156+
makeArrayContainer := generator{
1157+
name: "array",
1158+
f: func() *Bitmap {
1159+
rb := NewBitmap()
1160+
for i := 0; i < arrayDefaultMaxSize/2; i++ {
1161+
rb.Add(uint32(rand.Intn(math.MaxUint16)))
1162+
}
1163+
return rb
1164+
},
1165+
}
1166+
makeBitmapContainer := generator{
1167+
name: "bitmap",
1168+
f: func() *Bitmap {
1169+
buf := make([]uint64, 1024)
1170+
for i := range buf {
1171+
buf[i] = rand.Uint64()
1172+
}
1173+
1174+
return FromDense(buf, false)
1175+
},
1176+
}
1177+
1178+
for _, inPlace := range []bool{true, false} {
1179+
for _, leftGen := range []generator{makeRunContainer, makeArrayContainer, makeBitmapContainer} {
1180+
for _, rightGen := range []generator{makeRunContainer, makeArrayContainer, makeBitmapContainer} {
1181+
b.Run(fmt.Sprintf("inPlace=%v/left=%s/right=%s", inPlace, leftGen.name, rightGen.name), func(b *testing.B) {
1182+
b.StopTimer()
1183+
serializedLefts := make([][]byte, 1000)
1184+
for i := range serializedLefts {
1185+
var err error
1186+
serializedLefts[i], err = leftGen.f().ToBytes()
1187+
if err != nil {
1188+
b.Fatal(err)
1189+
}
1190+
}
1191+
serializedRights := make([][]byte, 1000)
1192+
for i := range serializedRights {
1193+
var err error
1194+
serializedRights[i], err = rightGen.f().ToBytes()
1195+
if err != nil {
1196+
b.Fatal(err)
1197+
}
1198+
}
1199+
1200+
lefts := make([]*Bitmap, b.N)
1201+
for i := range lefts {
1202+
buf := serializedLefts[i%len(serializedLefts)]
1203+
lefts[i] = NewBitmap()
1204+
if _, err := lefts[i].FromBuffer(buf); err != nil {
1205+
b.Fatal(err)
1206+
}
1207+
lefts[i] = lefts[i].Clone()
1208+
}
1209+
rights := make([]*Bitmap, b.N)
1210+
for i := range rights {
1211+
buf := serializedRights[i%len(serializedRights)]
1212+
rights[i] = NewBitmap()
1213+
if _, err := rights[i].FromBuffer(buf); err != nil {
1214+
b.Fatal(err)
1215+
}
1216+
rights[i] = rights[i].Clone()
1217+
}
1218+
b.StartTimer()
1219+
for i := 0; i < b.N; i++ {
1220+
if inPlace {
1221+
lefts[i].AndNot(rights[i])
1222+
} else {
1223+
_ = AndNot(lefts[i], rights[i])
1224+
}
1225+
}
1226+
})
1227+
}
1228+
}
1229+
}
1230+
}

runcontainer.go

+1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ import (
4747
// runContainer16 does run-length encoding of sets of
4848
// uint16 integers.
4949
type runContainer16 struct {
50+
// iv is a slice of sorted, non-overlapping, non-adjacent intervals.
5051
iv []interval16
5152
}
5253

0 commit comments

Comments
 (0)