Skip to content

Commit a21b4b2

Browse files
authored
Merge pull request #470 from RoaringBitmap/issue469
fixes Issue 469 (various Validate bugs)
2 parents 461d7ec + e5a4a6f commit a21b4b2

9 files changed

+85
-40
lines changed

roaring.go

+3
Original file line numberDiff line numberDiff line change
@@ -2133,6 +2133,9 @@ func (rb *Bitmap) Stats() Statistics {
21332133
return stats
21342134
}
21352135

2136+
// Validate checks if the bitmap is internally consistent.
2137+
// You may call it after deserialization to check that the bitmap is valid.
2138+
// This function returns an error if the bitmap is invalid, nil otherwise.
21362139
func (rb *Bitmap) Validate() error {
21372140
return rb.highlowcontainer.validate()
21382141
}

roaring64/bsi64_test.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,8 @@ func TestSetAndGetBigTimestamp(t *testing.T) {
107107
// Recover and check the known timestamp
108108
bv, _ := bsi.GetBigValue(1)
109109
seconds, nanoseconds := bigIntToSecondsAndNanos(bv)
110-
ts := time.Unix(seconds, int64(nanoseconds))
111-
assert.Equal(t, "3024-10-23T16:55:46.763295273Z", ts.Format(time.RFC3339Nano))
110+
assert.Equal(t, seconds, int64(33286611346))
111+
assert.Equal(t, nanoseconds, int32(763295273))
112112
assert.Equal(t, 67, bsi.BitCount())
113113
}
114114

roaring64/roaring64_test.go

-2
Original file line numberDiff line numberDiff line change
@@ -2015,8 +2015,6 @@ func Test32As64(t *testing.T) {
20152015
func TestRoaringArray64Validation(t *testing.T) {
20162016
a := roaringArray64{}
20172017

2018-
assert.ErrorIs(t, a.validate(), ErrEmptyKeys)
2019-
20202018
a.keys = append(a.keys, uint32(3), uint32(1))
20212019
assert.ErrorIs(t, a.validate(), ErrKeySortOrder)
20222020
a.clear()

roaring64/roaringarray64.go

+3-6
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ type roaringArray64 struct {
1414
}
1515

1616
var (
17-
ErrEmptyKeys = errors.New("keys were empty")
1817
ErrKeySortOrder = errors.New("keys were out of order")
1918
ErrCardinalityConstraint = errors.New("size of arrays was not coherent")
2019
)
@@ -437,10 +436,6 @@ func (ra *roaringArray64) checkKeysSorted() bool {
437436
// validate checks the referential integrity
438437
// ensures len(keys) == len(containers), recurses and checks each container type
439438
func (ra *roaringArray64) validate() error {
440-
if len(ra.keys) == 0 {
441-
return ErrEmptyKeys
442-
}
443-
444439
if !ra.checkKeysSorted() {
445440
return ErrKeySortOrder
446441
}
@@ -454,11 +449,13 @@ func (ra *roaringArray64) validate() error {
454449
}
455450

456451
for _, maps := range ra.containers {
457-
458452
err := maps.Validate()
459453
if err != nil {
460454
return err
461455
}
456+
if maps.IsEmpty() {
457+
return errors.New("empty container")
458+
}
462459
}
463460

464461
return nil

roaring_test.go

+58-6
Original file line numberDiff line numberDiff line change
@@ -2732,8 +2732,6 @@ func TestFromBitSet(t *testing.T) {
27322732
func TestRoaringArrayValidation(t *testing.T) {
27332733
a := newRoaringArray()
27342734

2735-
assert.ErrorIs(t, a.validate(), ErrEmptyKeys)
2736-
27372735
a.keys = append(a.keys, uint16(3), uint16(1))
27382736
assert.ErrorIs(t, a.validate(), ErrKeySortOrder)
27392737
a.clear()
@@ -2744,7 +2742,7 @@ func TestRoaringArrayValidation(t *testing.T) {
27442742
a.containers = append(a.containers, &runContainer16{}, &runContainer16{}, &runContainer16{})
27452743
assert.ErrorIs(t, a.validate(), ErrCardinalityConstraint)
27462744
a.needCopyOnWrite = append(a.needCopyOnWrite, true, false, true)
2747-
assert.Errorf(t, a.validate(), "zero intervals")
2745+
assert.ErrorIs(t, a.validate(), ErrRunIntervalsEmpty)
27482746
}
27492747

27502748
func TestBitMapValidation(t *testing.T) {
@@ -2805,11 +2803,9 @@ func TestBitMapValidationFromDeserialization(t *testing.T) {
28052803
bm.AddRange(100, 110)
28062804
},
28072805
corruptor: func(s []byte) {
2808-
// 13 is the length of the run
2809-
// Setting to zero causes an invalid run
28102806
s[13] = 0
28112807
},
2812-
err: ErrRunIntervalLength,
2808+
err: ErrRunIntervalSize,
28132809
},
28142810
{
28152811
name: "Creates Interval Overlap",
@@ -3310,3 +3306,59 @@ func TestIssue467CaseLarge(t *testing.T) {
33103306
b.RunOptimize()
33113307
require.NoError(t, b.Validate())
33123308
}
3309+
3310+
func TestValidateEmpty(t *testing.T) {
3311+
require.NoError(t, New().Validate())
3312+
}
3313+
3314+
func TestValidate469(t *testing.T) {
3315+
b := New()
3316+
b.RemoveRange(0, 180)
3317+
b.AddRange(0, 180)
3318+
require.NoError(t, b.Validate())
3319+
b.RemoveRange(180, 217)
3320+
b.AddRange(180, 217)
3321+
require.NoError(t, b.Validate())
3322+
b.RemoveRange(217, 2394)
3323+
b.RemoveRange(2394, 2427)
3324+
b.AddRange(2394, 2427)
3325+
require.NoError(t, b.Validate())
3326+
b.RemoveRange(2427, 2428)
3327+
b.AddRange(2427, 2428)
3328+
require.NoError(t, b.Validate())
3329+
b.RemoveRange(2428, 3345)
3330+
require.NoError(t, b.Validate())
3331+
b.RemoveRange(3345, 3346)
3332+
require.NoError(t, b.Validate())
3333+
b.RemoveRange(3346, 3597)
3334+
require.NoError(t, b.Validate())
3335+
b.RemoveRange(3597, 3815)
3336+
require.NoError(t, b.Validate())
3337+
b.RemoveRange(3815, 3816)
3338+
require.NoError(t, b.Validate())
3339+
b.AddRange(3815, 3816)
3340+
require.NoError(t, b.Validate())
3341+
b.RemoveRange(3816, 3856)
3342+
b.RemoveRange(3856, 4067)
3343+
b.RemoveRange(4067, 4069)
3344+
b.RemoveRange(4069, 4071)
3345+
b.RemoveRange(4071, 4095)
3346+
b.RemoveRange(4095, 4096)
3347+
require.NoError(t, b.Validate())
3348+
b.RunOptimize()
3349+
require.False(t, b.IsEmpty())
3350+
require.NoError(t, b.Validate())
3351+
}
3352+
3353+
func TestValidateFromV1(t *testing.T) {
3354+
v1 := New()
3355+
for i := 0; i <= 2; i++ {
3356+
v1.Add(uint32(i))
3357+
}
3358+
v1.RunOptimize()
3359+
b, err := v1.MarshalBinary()
3360+
require.NoError(t, err)
3361+
v2 := New()
3362+
require.NoError(t, v2.UnmarshalBinary(b))
3363+
require.NoError(t, v2.Validate())
3364+
}

roaringarray.go

-5
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,6 @@ const (
9090
)
9191

9292
var (
93-
ErrEmptyKeys = errors.New("keys were empty")
9493
ErrKeySortOrder = errors.New("keys were out of order")
9594
ErrCardinalityConstraint = errors.New("size of arrays was not coherent")
9695
)
@@ -798,10 +797,6 @@ func (ra *roaringArray) checkKeysSorted() bool {
798797
// validate checks the referential integrity
799798
// ensures len(keys) == len(containers), recurses and checks each container type
800799
func (ra *roaringArray) validate() error {
801-
if len(ra.keys) == 0 {
802-
return ErrEmptyKeys
803-
}
804-
805800
if !ra.checkKeysSorted() {
806801
return ErrKeySortOrder
807802
}

runcontainer.go

+17-14
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@ type interval16 struct {
6262
var (
6363
ErrRunIntervalsEmpty = errors.New("run contained no interval")
6464
ErrRunNonSorted = errors.New("runs were not sorted")
65-
ErrRunIntervalLength = errors.New("interval had zero length")
6665
ErrRunIntervalEqual = errors.New("intervals were equal")
6766
ErrRunIntervalOverlap = errors.New("intervals overlapped or were continguous")
6867
ErrRunIntervalSize = errors.New("too many intervals relative to data")
@@ -2757,10 +2756,9 @@ func (rc *runContainer16) validate() error {
27572756

27582757
intervalsSum := 0
27592758
for outeridx := range rc.iv {
2760-
2761-
if rc.iv[outeridx].length == 0 {
2762-
return ErrRunIntervalLength
2763-
}
2759+
// The length being stored is the actual length - 1.
2760+
// So we need to add 1 to get the actual length.
2761+
// It is not possible to have a run with length 0.
27642762

27652763
outerInterval := rc.iv[outeridx]
27662764

@@ -2794,15 +2792,20 @@ func (rc *runContainer16) validate() error {
27942792
check that the number of runs < (number of distinct values) / 2
27952793
(otherwise you could use an array container)
27962794
*/
2797-
if MaxIntervalsSum <= intervalsSum {
2798-
if !(len(rc.iv) < MaxNumIntervals) {
2799-
return ErrRunIntervalSize
2800-
}
2801-
} else {
2802-
if !(len(rc.iv) < (intervalsSum / 2)) {
2803-
return ErrRunIntervalSize
2804-
}
2805-
}
28062795

2796+
sizeAsRunContainer := runContainer16SerializedSizeInBytes(len(rc.iv))
2797+
sizeAsBitmapContainer := bitmapContainerSizeInBytes()
2798+
sizeAsArrayContainer := arrayContainerSizeInBytes(intervalsSum)
2799+
fmt.Println(sizeAsRunContainer, sizeAsBitmapContainer, sizeAsArrayContainer)
2800+
// this is always ok:
2801+
if sizeAsRunContainer < minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) {
2802+
return nil
2803+
}
2804+
if sizeAsRunContainer >= sizeAsBitmapContainer {
2805+
return ErrRunIntervalSize
2806+
}
2807+
if sizeAsRunContainer >= sizeAsArrayContainer {
2808+
return ErrRunIntervalSize
2809+
}
28072810
return nil
28082811
}

runcontainer_test.go

+1-2
Original file line numberDiff line numberDiff line change
@@ -2588,13 +2588,12 @@ func TestIntervalValidationFailing(t *testing.T) {
25882588
start := -4
25892589
for i := 0; i < MaxNumIntervals; i++ {
25902590
start += 4
2591-
end := start + 2
2591+
end := start + 1
25922592
a := newInterval16Range(uint16(start), uint16(end))
25932593
rc.iv = append(rc.iv, a)
25942594

25952595
}
25962596
assert.ErrorIs(t, rc.validate(), ErrRunIntervalSize)
2597-
25982597
// too many small runs, use array
25992598
rc = &runContainer16{}
26002599
start = -3

serialization_frozen_test.go

+1-3
Original file line numberDiff line numberDiff line change
@@ -171,11 +171,9 @@ func TestBitMapValidationFromFrozen(t *testing.T) {
171171
bm.AddRange(100, 110)
172172
},
173173
corruptor: func(s []byte) {
174-
// 13 is the length of the run
175-
// Setting to zero causes an invalid run
176174
s[2] = 0
177175
},
178-
err: ErrRunIntervalLength,
176+
err: ErrRunIntervalSize,
179177
},
180178
{
181179
name: "Creates Interval Overlap",

0 commit comments

Comments
 (0)