bm25/parallel_test.go at main · crawlab-team/bm25 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
package bm25

import (
	"strings"
	"testing"
)

func TestGetScoresParallel(t *testing.T) {
	corpus := []string{"hello world", "this is a test"}
	tokenizer := func(s string) []string { return strings.Split(s, " ") }
	bm25, _ := NewBM25Okapi(corpus, tokenizer, 1.2, 0.75, nil)

	// Test case: Getting scores in parallel for an empty query
	_, err := bm25.GetScoresParallel([]string{}, bm25)
	if err == nil {
		t.Errorf("Expected an error for an empty query, but got nil")
	}

	// Test case: Getting scores in parallel for a single-term query
	scores, err := bm25.GetScoresParallel([]string{"hello"}, bm25)
	if err != nil {
		t.Errorf("Unexpected error: %v", err)
	}
	expected := []float64{0.6931471805599453, 0.0}
	if len(scores) != len(expected) {
		t.Errorf("Expected %d scores, but got %d", len(expected), len(scores))
	}
	for i, score := range scores {
		if score != expected[i] {
			t.Errorf("Expected score %.2f at index %d, but got %.2f", expected[i], i, score)
		}
	}

	// Test case: Getting scores in parallel for a multi-term query
	scores, err = bm25.GetScoresParallel([]string{"this", "test"}, bm25)
	if err != nil {
		t.Errorf("Unexpected error: %v", err)
	}
	expected = []float64{0.0, 1.3862943611198906}
	if len(scores) != len(expected) {
		t.Errorf("Expected %d scores, but got %d", len(expected), len(scores))
	}
	for i, score := range scores {
		if score != expected[i] {
			t.Errorf("Expected score %.2f at index %d, but got %.2f", expected[i], i, score)
		}
	}
}

func TestGetBatchScoresParallel(t *testing.T) {
	corpus := []string{"hello world", "this is a test"}
	tokenizer := func(s string) []string { return strings.Split(s, " ") }
	bm25, _ := NewBM25Okapi(corpus, tokenizer, 1.2, 0.75, nil)

	// Test case: Getting batch scores in parallel for an empty query
	_, err := bm25.GetBatchScoresParallel([]string{}, []int{0, 1}, bm25)
	if err == nil {
		t.Errorf("Expected an error for an empty query, but got nil")
	}

	// Test case: Getting batch scores in parallel for an empty document IDs slice
	_, err = bm25.GetBatchScoresParallel([]string{"hello"}, []int{}, bm25)
	if err == nil {
		t.Errorf("Expected an error for an empty document IDs slice, but got nil")
	}

	// Test case: Getting batch scores in parallel for invalid document IDs
	_, err = bm25.GetBatchScoresParallel([]string{"hello"}, []int{-1, 2}, bm25)
	if err == nil {
		t.Errorf("Expected an error for invalid document IDs, but got nil")
	}

	// Test case: Getting batch scores in parallel for a single-term query
	scores, err := bm25.GetBatchScoresParallel([]string{"hello"}, []int{0}, bm25)
	if err != nil {
		t.Errorf("Unexpected error: %v", err)
	}
	expected := []float64{0.6931471805599453}
	if len(scores) != len(expected) {
		t.Errorf("Expected %d scores, but got %d", len(expected), len(scores))
	}
	for i, score := range scores {
		if score != expected[i] {
			t.Errorf("Expected score %.2f at index %d, but got %.2f", expected[i], i, score)
		}
	}

	// Test case: Getting batch scores in parallel for a multi-term query
	scores, err = bm25.GetBatchScoresParallel([]string{"this", "test"}, []int{1}, bm25)
	if err != nil {
		t.Errorf("Unexpected error: %v", err)
	}
	expected = []float64{1.3862943611198906}
	if len(scores) != len(expected) {
		t.Errorf("Expected %d scores, but got %d", len(expected), len(scores))
	}
	for i, score := range scores {
		if score != expected[i] {
			t.Errorf("Expected score %.2f at index %d, but got %.2f", expected[i], i, score)
		}
	}
}

func TestGetTopNParallel(t *testing.T) {
	corpus := []string{"hello world", "this is a test"}
	tokenizer := func(s string) []string { return strings.Split(s, " ") }
	bm25, _ := NewBM25Okapi(corpus, tokenizer, 1.2, 0.75, nil)

	// Test case: Getting top N documents in parallel for an empty query
	_, err := bm25.GetTopNParallel([]string{}, 2, bm25)
	if err == nil {
		t.Errorf("Expected an error for an empty query, but got nil")
	}

	// Test case: Getting top N documents in parallel with n <= 0
	_, err = bm25.GetTopNParallel([]string{"hello"}, 0, bm25)
	if err == nil {
		t.Errorf("Expected an error for n <= 0, but got nil")
	}

	// Test case: Getting top N documents in parallel for a single-term query
	topDocs, err := bm25.GetTopNParallel([]string{"hello"}, 1, bm25)
	if err != nil {
		t.Errorf("Unexpected error: %v", err)
	}
	expected := []string{"hello world"}
	if len(topDocs) != len(expected) {
		t.Errorf("Expected %d top documents, but got %d", len(expected), len(topDocs))
	}
	for i, doc := range topDocs {
		if doc != expected[i] {
			t.Errorf("Expected document '%s' at index %d, but got '%s'", expected[i], i, doc)
		}
	}

	// Test case: Getting top N documents in parallel for a multi-term query
	topDocs, err = bm25.GetTopNParallel([]string{"this", "test"}, 1, bm25)
	if err != nil {
		t.Errorf("Unexpected error: %v", err)
	}
	expected = []string{"this is a test"}
	if len(topDocs) != len(expected) {
		t.Errorf("Expected %d top documents, but got %d", len(expected), len(topDocs))
	}
	for i, doc := range topDocs {
		if doc != expected[i] {
			t.Errorf("Expected document '%s' at index %d, but got '%s'", expected[i], i, doc)
		}
	}
}