-
Notifications
You must be signed in to change notification settings - Fork 79
Open
Description
I need a bit of help following TestHLLTC_Merge_Complex in hyperloglog_test.go:
Lines 374 and 384: why is ratio calculated against sk1?
Lines 391 and 395: I tried inserting the 500000 records into sk2 and then merge with empty sk3, like in the source. This gives me a ratio of about 1.18%. I made another version where the 500000 records were inserted into sk3, and then merge with sk2. This gets me 6.52%. Shouldn't they be the same?
My code for ref:
func TestHLLTC_Merge_Complex(t *testing.T) {
sk1, err := newSketch(14, true)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
sk2, err := newSketch(14, true)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
sk3, err := newSketch(14, true)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
// sk1 will contain flow-[1..10000000]
// sk2 will contain flow-[2,4,...10000000]
unique := map[string]bool{}
for i := 1; len(unique) <= 10000000; i++ {
str := fmt.Sprintf("flow-%d", i)
sk1.Insert([]byte(str))
if i%2 == 0 {
sk2.Insert([]byte(str))
}
unique[str] = true
}
// sk1
exact1 := uint64(len(unique))
res1 := uint64(sk1.Estimate())
ratio := 100*estimateError(res1, exact1)
if ratio > 2 {
t.Errorf("exact %d, got %d which is %.2f%% error", exact1, res1, ratio)
}
t.Logf("sk1 estimate %d; exact %d (%.2f%% err)", res1, exact1, ratio)
// sk2
exact2 := uint64(len(unique))/2
res2 := uint64(sk2.Estimate())
ratio = 100*estimateError(res2, exact2)
if ratio > 2 {
t.Errorf("exact %d, got %d which is %.2f%% error", exact2, res2, ratio)
}
t.Logf("sk2 estimate %d; exact %d (%.2f%% err)", res2, exact2, ratio)
// sk2 merge sk1
if err := sk2.Merge(sk1); err != nil {
t.Errorf("unexpected error: %v", err)
}
exact2 = uint64(len(unique))
res2 = uint64(sk2.Estimate())
ratio = 100*estimateError(res2, exact2)
if ratio > 2 {
t.Errorf("exact %d, got %d which is %.2f%% error", exact2, res2, ratio)
}
t.Logf("sk2 merge sk1 estimate %d; exact %d (%.2f%% err)", res2, exact2, ratio)
for i := 1; i <= 500000; i++ {
str := fmt.Sprintf("stream-%d", i)
sk3.Insert([]byte(str)) // also try sk2.Insert(...)
unique[str] = true
}
/*
exact3 := uint64(500000)
res3 := uint64(sk3.Estimate())
ratio = 100*estimateError(res3, exact3)
if ratio > 2 {
t.Errorf("exact %d, got %d which is %.2f%% error", exact3, res3, ratio)
}
t.Logf("sk3 estimate %d; exact %d (%.2f%% err)", res3, exact3, ratio)
*/
// sk2 merge sk3
if err := sk2.Merge(sk3); err != nil {
t.Fatalf("unexpected error: %v", err)
}
exact2 = uint64(len(unique))
res2 = uint64(sk2.Estimate())
ratio = 100*estimateError(res2, exact2)
if ratio > 2 {
t.Errorf("exact %d, got %d which is %.2f%% error", exact2, res2, ratio)
}
t.Logf("sk2 merge sk3 estimate %d; exact %d (%.2f%% err)", res2, exact2, ratio)
}Metadata
Metadata
Assignees
Labels
No labels