Skip to content

need help on TestHLLTC_Merge_Complex #35

@imacks

Description

@imacks

I need a bit of help following TestHLLTC_Merge_Complex in hyperloglog_test.go:

Lines 374 and 384: why is ratio calculated against sk1?

Lines 391 and 395: I tried inserting the 500000 records into sk2 and then merge with empty sk3, like in the source. This gives me a ratio of about 1.18%. I made another version where the 500000 records were inserted into sk3, and then merge with sk2. This gets me 6.52%. Shouldn't they be the same?

My code for ref:

func TestHLLTC_Merge_Complex(t *testing.T) {
	sk1, err := newSketch(14, true)
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	sk2, err := newSketch(14, true)
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	sk3, err := newSketch(14, true)
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	// sk1 will contain flow-[1..10000000]
	// sk2 will contain flow-[2,4,...10000000]
	unique := map[string]bool{}
	for i := 1; len(unique) <= 10000000; i++ {
		str := fmt.Sprintf("flow-%d", i)
		sk1.Insert([]byte(str))
		if i%2 == 0 {
			sk2.Insert([]byte(str))
		}
		unique[str] = true
	}

	// sk1
	exact1 := uint64(len(unique))
	res1 := uint64(sk1.Estimate())
	ratio := 100*estimateError(res1, exact1)
	if ratio > 2 {
		t.Errorf("exact %d, got %d which is %.2f%% error", exact1, res1, ratio)
	}
	t.Logf("sk1 estimate %d; exact %d (%.2f%% err)", res1, exact1, ratio)

	// sk2
	exact2 := uint64(len(unique))/2
	res2 := uint64(sk2.Estimate())
	ratio = 100*estimateError(res2, exact2)
	if ratio > 2 {
		t.Errorf("exact %d, got %d which is %.2f%% error", exact2, res2, ratio)
	}
	t.Logf("sk2 estimate %d; exact %d (%.2f%% err)", res2, exact2, ratio)

	// sk2 merge sk1
	if err := sk2.Merge(sk1); err != nil {
		t.Errorf("unexpected error: %v", err)
	}
	exact2 = uint64(len(unique))
	res2 = uint64(sk2.Estimate())
	ratio = 100*estimateError(res2, exact2)
	if ratio > 2 {
		t.Errorf("exact %d, got %d which is %.2f%% error", exact2, res2, ratio)
	}
	t.Logf("sk2 merge sk1 estimate %d; exact %d (%.2f%% err)", res2, exact2, ratio)

	for i := 1; i <= 500000; i++ {
		str := fmt.Sprintf("stream-%d", i)
		sk3.Insert([]byte(str)) // also try sk2.Insert(...)
		unique[str] = true
	}
	/*
	exact3 := uint64(500000)
	res3 := uint64(sk3.Estimate())
	ratio = 100*estimateError(res3, exact3)
	if ratio > 2 {
		t.Errorf("exact %d, got %d which is %.2f%% error", exact3, res3, ratio)
	}
	t.Logf("sk3 estimate %d; exact %d (%.2f%% err)", res3, exact3, ratio)
	*/

	// sk2 merge sk3
	if err := sk2.Merge(sk3); err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	exact2 = uint64(len(unique))
	res2 = uint64(sk2.Estimate())
	ratio = 100*estimateError(res2, exact2)
	if ratio > 2 {
		t.Errorf("exact %d, got %d which is %.2f%% error", exact2, res2, ratio)
	}
	t.Logf("sk2 merge sk3 estimate %d; exact %d (%.2f%% err)", res2, exact2, ratio)
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions