Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
fdc3fa3
add tests
zpf121 Mar 9, 2026
d7de388
fix
zpf121 Mar 10, 2026
d07ba2b
Merge remote-tracking branch 'origin/main' into add_restart_tests
zpf121 Mar 10, 2026
c25fb9a
modify
zpf121 Mar 11, 2026
2b173e3
modify
zpf121 Mar 11, 2026
3bc77a4
Merge remote-tracking branch 'origin/main' into add_restart_tests
zpf121 Mar 11, 2026
2f3b1e2
modify
zpf121 Mar 11, 2026
d01a0db
modify
zpf121 Mar 12, 2026
00cd676
modify
zpf121 Mar 12, 2026
78f9b59
Merge remote-tracking branch 'origin/main' into add_restart_tests
zpf121 Mar 12, 2026
257495f
modify
zpf121 Mar 12, 2026
51e75ff
modify
zpf121 Mar 12, 2026
9a8a5c9
Merge remote-tracking branch 'origin/main' into add_restart_tests
zpf121 Mar 12, 2026
4f1f2b9
modify
zpf121 Mar 12, 2026
b165b1e
Merge remote-tracking branch 'origin/main' into add_restart_tests
zpf121 Mar 13, 2026
c51a7d5
modify
zpf121 Mar 13, 2026
006c85d
modify
zpf121 Mar 13, 2026
6d4955a
Merge remote-tracking branch 'origin/main' into add_restart_tests
zpf121 Mar 13, 2026
c272653
modify
zpf121 Mar 13, 2026
290a48c
modify
zpf121 Mar 13, 2026
3a152c9
modify
zpf121 Mar 13, 2026
7697b3e
modify
zpf121 Mar 13, 2026
045d980
modify
zpf121 Mar 13, 2026
d954685
modify
zpf121 Mar 13, 2026
1372d83
modify
zpf121 Mar 13, 2026
2eb0ccd
modify
zpf121 Mar 13, 2026
be060a7
modify
zpf121 Mar 13, 2026
139db5f
modify
zpf121 Mar 13, 2026
799e8b4
modify
zpf121 Mar 13, 2026
152127d
modify
zpf121 Mar 13, 2026
ff72032
Merge remote-tracking branch 'origin/main' into add_restart_tests
zpf121 Mar 13, 2026
f5720f6
modify
zpf121 Mar 13, 2026
1d7f032
modify
zpf121 Mar 13, 2026
a7361f4
modify
zpf121 Mar 13, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 33 additions & 12 deletions python/restart_test/restart_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import csv
import json
import numpy as np
import random
from infinity.common import SparseVector


Expand All @@ -14,7 +15,6 @@ def gen_factory():
def gen(insert: int):
for i in range(insert):
yield [i, [0.1, 0.2, 0.3, 0.4]]

return gen

def index():
Expand All @@ -38,7 +38,6 @@ def gen(insert: int):
yield [i, "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"]
else:
yield [i, "test"]

return gen

def index():
Expand All @@ -56,7 +55,6 @@ def gen(insert: int):
yield [i, [0.1, 0.2, 0.3, 0.4]]
else:
yield [i, [[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]]]

return gen


Expand Down Expand Up @@ -85,7 +83,6 @@ def gen(insert_n: int):
i += 1
if i >= insert_n:
break

return gen

def index():
Expand Down Expand Up @@ -165,7 +162,6 @@ def convert_sparse(sparse_data: dict[str, float]):
i += 1
if i >= insert_n:
break

return gen

def index():
Expand Down Expand Up @@ -207,9 +203,6 @@ def import_size() -> int:


class MultiIndexTypesGenerator:
DEFAULT_CSV_FILE = "test/data/csv/enwiki_embedding_plus_9999.csv"
DEFAULT_NUM_ROWS = 9999

def columns():
return {
"doctitle": {"type": "varchar"},
Expand All @@ -232,17 +225,45 @@ def index():
index.IndexInfo("category", index.IndexType.Secondary, {"cardinality": "low"}),
]

def import_file() -> str:
filepath = MultiIndexTypesGenerator.DEFAULT_CSV_FILE
K_NUM_ROWS = 9999
def generate_import_file() -> str:
filepath = f"test/data/csv/enwiki_embedding_plus_{MultiIndexTypesGenerator.K_NUM_ROWS}.csv"
if os.path.exists(filepath):
return filepath
print(f"CSV file not found: {filepath}. Generating...")
from tools.generate_wiki_embedding_plus import generate
generate()
generate(MultiIndexTypesGenerator.K_NUM_ROWS)
return filepath

def import_size() -> int:
return MultiIndexTypesGenerator.DEFAULT_NUM_ROWS
return MultiIndexTypesGenerator.K_NUM_ROWS

def import_options():
return {"file_type": "csv", "delimiter": "\t"}

K_CATEGORIES = ["A", "B", "C", "D"]
K_TEXT_WORDS = ["apple", "banana", "cherry", "date"]

@staticmethod
def generate_random_row(num: int):
vec = np.array([random.random() for _ in range(2048)], dtype=np.float32)
multivec = np.array([[random.random() for _ in range(1024)] for _ in range(2)], dtype=np.float32)
sparse_indices = [j for j in range(1024) if random.random() > 0.9]
if not sparse_indices:
sparse_indices = [0, 1, 2]
sparse_values = [random.randint(1, 100) for _ in range(len(sparse_indices))]
sparse_vec = SparseVector(indices=sparse_indices, values=sparse_values)

return {
"doctitle": f"test_title_{num}",
"docdate": "01-JAN-2024 00:00:00.000",
"body": f"test_text_{num}_{random.choice(MultiIndexTypesGenerator.K_TEXT_WORDS)}",
"num": num,
"category": MultiIndexTypesGenerator.K_CATEGORIES[num % len(MultiIndexTypesGenerator.K_CATEGORIES)],
"vector_col": vec,
"multi_vector_col": multivec,
"sparse_col": sparse_vec
}


if __name__ == "__main__":
Expand Down
Loading