Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
3 changes: 2 additions & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ jobs:
uses: astral-sh/ruff-action@v3
with:
version: ">=0.14.x"
args: "check"
args: "check --exclude=**/remote_thrift/infinity_thrift_rpc/**"

- name: Start builder container
if: ${{ !cancelled() && !failure() }}
Expand All @@ -123,6 +123,7 @@ jobs:
| grep -E '\.(cpp|h|hpp|cppm)$' \
| grep -v 'third_party/' \
| grep -v 'network/' \
| grep -v 'remote_thrift/infinity_thrift_rpc/' \
| grep -v 'parser/' || true)

if [ -n "$CHANGED_FILES" ]; then
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,8 @@ python/infinity_sdk.egg-info
sift_1m

# ignore all fvecs benchmark file
test/data/benchmark/*
#test/data/benchmark/*
test/data/benchmark/

# ignore valgrind output file
callgrind.out.*
Expand Down
26 changes: 23 additions & 3 deletions .hooks/pre-commit
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,44 @@

import subprocess


def get_files():
return subprocess.check_output(['git', 'diff-index', '--cached',
'--name-only', 'HEAD']).split()


def apply_code_style():
files = filter(lambda x: x.find(b'third_party') == -1, get_files())
files = filter(lambda x: x.find(b'src/networker/third_party/infinity_thrift') == -1, files)
files = filter(lambda x: x.find(b'src/parser/expression_parser.cpp') == -1, files)
files = filter(lambda x: x.find(b'src/parser/expression_parser.h') == -1, files)
files = filter(lambda x: x.find(b'src/parser/lexer.cpp') == -1, files)
files = filter(lambda x: x.find(b'src/parser/lexer.h') == -1, files)
files = filter(lambda x: x.find(b'src/parser/parser.cpp') == -1, files)
files = filter(lambda x: x.find(b'src/parser/parser.h') == -1, files)
files = filter(lambda x: x.find(b'src/parser/search_parser.cpp') == -1, files)
files = filter(lambda x: x.find(b'src/parser/search_parser.h') == -1, files)

files = filter(lambda x: x.endswith(b'.c') or
x.endswith(b'.h') or
x.endswith(b'.hpp') or
x.endswith(b'.cpp') or
x.endswith(b'.cppm'),files)
x.endswith(b'.cppm'), files)
for f in files:
print("Apply code style to: " + str(f))
subprocess.check_output(['clang-format-20', '-i', f])
subprocess.check_output(['git', 'add', f])
try:
subprocess.check_output(['clang-format-20', '-i', f])
except subprocess.CalledProcessError as e:
print(f"⚠️ clang-format failed for {f}: {e}")
try:
subprocess.check_output(['git', 'add', f])
except subprocess.CalledProcessError as e:
print(f"⚠️ git add failed for {f}: {e}")


def main():
apply_code_style()


if (__name__ == '__main__'):
main()
105 changes: 54 additions & 51 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
cmake_minimum_required(VERSION 4.0.3...4.2.0)
cmake_minimum_required(VERSION 4.0.3...4.2.1)

cmake_policy(SET CMP0167 OLD)

set(CMAKE_SUPPRESS_DEVELOPER_WARNINGS TRUE)

if (CMAKE_VERSION GREATER_EQUAL 4.0.3 AND CMAKE_VERSION LESS_EQUAL 4.2.0)
if (CMAKE_VERSION GREATER_EQUAL 4.0.3 AND CMAKE_VERSION LESS_EQUAL 4.2.1)
set(CMAKE_EXPERIMENTAL_CXX_IMPORT_STD "d0edc3af-4c50-42ea-a356-e2862fe7a444")
endif ()

Expand Down Expand Up @@ -52,10 +52,10 @@ if (CLANG_VERSION_STRING VERSION_GREATER_EQUAL 20)
message(STATUS "Building ${PROJECT_NAME} with CMake version: ${CMAKE_VERSION} On CLANG-${CLANG_VERSION_STRING}")

# add_compile_options(-ftime-trace)
# add_compile_options(-fmodule-header)
# add_compile_options(-fmodule-header)
add_link_options(-fuse-ld=mold)
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-unused-parameter -Wno-unused-private-field --rtlib=libgcc --unwindlib=libgcc")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-unused-parameter -Wno-unused-private-field")

add_link_options(-L/usr/local/lib)
set(CMAKE_FIND_PACKAGE_SORT_ORDER NATURAL)
set(CMAKE_FIND_PACKAGE_SORT_DIRECTION DEC)
Expand All @@ -74,47 +74,48 @@ elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|amd64)$")
set(X86_64 TRUE)
endif ()

#strength, for image maintainer
set(GCC_SEARCH_ROOTS
/usr
/usr/local
)
##strength, for image maintainer
#set(GCC_SEARCH_ROOTS
# /usr
# /usr/local
#)

#if you encounter a problem, try commenting out the code below.
set(GCC15_SUFFIXES
# lib
lib/gcc/15
lib/gcc/x86_64-linux-gnu/15
lib/gcc/x86_64-pc-linux-gnu/15.1.0
lib64/gcc/x86_64-linux-gnu/15
lib64/gcc/15
lib64/gcc/x86_64-pc-linux-gnu/15.1.0
)

find_library(STDCXX15_STATIC
NAMES libstdc++.a
PATHS ${GCC_SEARCH_ROOTS}
PATH_SUFFIXES ${GCC15_SUFFIXES}
REQUIRED
NO_DEFAULT_PATH
)

find_library(STDCXX15EXP_STATIC
NAMES libstdc++exp.a
PATHS ${GCC_SEARCH_ROOTS}
PATH_SUFFIXES ${GCC15_SUFFIXES}
REQUIRED
NO_DEFAULT_PATH
)

get_filename_component(GCC15_LIB_DIR ${STDCXX15_STATIC} DIRECTORY)

set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L${GCC15_LIB_DIR} -static-libstdc++ -static-libgcc")
#set(GCC15_SUFFIXES
# # lib
# lib/gcc/15
# lib/gcc/aarch64-linux-gnu/15
# lib/gcc/x86_64-linux-gnu/15
# lib/gcc/x86_64-pc-linux-gnu/15.1.0
# lib64/gcc/x86_64-linux-gnu/15
# lib64/gcc/15
# lib64/gcc/x86_64-pc-linux-gnu/15.1.0
#)

# find_library(GCC15_STATIC
# NAMES libgcc.a
# PATHS ${GCC_SEARCH_ROOTS}
# PATH_SUFFIXES ${GCC15_SUFFIXES}
# REQUIRED
# NO_DEFAULT_PATH
# )

#find_library(STDCXX15EXP_STATIC
# NAMES libstdc++exp.a
# PATHS ${GCC_SEARCH_ROOTS}
# PATH_SUFFIXES ${GCC15_SUFFIXES}
# REQUIRED
# NO_DEFAULT_PATH
#)

#get_filename_component(GCC15_LIB_DIR "${STDCXX15_STATIC}" DIRECTORY)

set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libstdc++ -static-libgcc")
########

find_package(absl CONFIG REQUIRED)
find_package(Arrow CONFIG REQUIRED)
find_package(Boost REQUIRED COMPONENTS asio)
find_package(Boost REQUIRED COMPONENTS asio thread)
find_package(CLI11 CONFIG REQUIRED)
#find_package(CURL REQUIRED) # wait for s3
#darts-clone # wait for fix
Expand Down Expand Up @@ -142,9 +143,19 @@ find_package(simdjson CONFIG REQUIRED)
#find_package(spdlog REQUIRED) # wait for fmt:11.2.0
if (ARM64)
add_library(sse2neon INTERFACE)
# need this after highway and before simdcomp
find_path(SSE2NEON_INCLUDE_DIRS "sse2neon/sse2neon.h")
target_include_directories(sse2neon SYSTEM ${SSE2NEON_INCLUDE_DIRS})
if (EXISTS "${VCPKG_INSTALLED_DIR}/include/sse2neon/sse2neon.h")
set(SSE2NEON_INCLUDE_DIRS "${VCPKG_INSTALLED_DIR}/include")
message(STATUS "Found sse2neon in vcpkg: ${SSE2NEON_INCLUDE_DIRS}")
target_include_directories(sse2neon SYSTEM INTERFACE ${SSE2NEON_INCLUDE_DIRS})
else ()
find_path(SSE2NEON_INCLUDE_DIRS "sse2neon/sse2neon.h")
if (SSE2NEON_INCLUDE_DIRS)
message(STATUS "Found sse2neon: ${SSE2NEON_INCLUDE_DIRS}")
target_include_directories(sse2neon SYSTEM INTERFACE ${SSE2NEON_INCLUDE_DIRS})
else ()
message(WARNING "sse2neon not found, proceeding without it.")
endif ()
endif ()
endif ()
find_package(tomlplusplus CONFIG REQUIRED)
#find_package(turbobase64 CONFIG REQUIRED)
Expand Down Expand Up @@ -318,16 +329,9 @@ elseif ("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
endif ()

set(CMAKE_DEBUG_POSTFIX "")

else ()
message(FATAL_ERROR "Only support CMake build type: Debug, RelWithDebInfo, and Release")
endif ()

if (CLANG_VERSION_STRING VERSION_EQUAL 17)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Werror -Wno-asm-operand-widths -Wno-unused-command-line-argument -Wno-deprecated-declarations -Wno-read-modules-implicitly -Wextra -Wno-unused-parameter -Wno-unused-private-field -pthread -fcolor-diagnostics")
else ()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Werror -Wno-asm-operand-widths -Wno-unused-command-line-argument -Wno-deprecated-declarations -Wextra -Wno-unused-parameter -Wno-unused-private-field -pthread -fcolor-diagnostics")
endif ()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Werror -Wno-asm-operand-widths -Wno-unused-command-line-argument -Wno-deprecated-declarations -Wextra -Wno-unused-parameter -Wno-unused-private-field -pthread -fcolor-diagnostics")

MESSAGE(STATUS "C++ Compilation flags: " ${CMAKE_CXX_FLAGS})

Expand Down Expand Up @@ -386,7 +390,6 @@ if (X86_64)
else ()
add_definitions(-march=native)
endif ()

execute_process(
COMMAND bash -c "zgrep CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y /proc/config.gz 2>/dev/null; grep CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y /boot/config-$(uname -r) 2>/dev/null"
OUTPUT_VARIABLE HAVE_EFFICIENT_UNALIGNED_ACCESS
Expand Down
35 changes: 28 additions & 7 deletions benchmark/local_infinity/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ target_link_libraries(infinity_benchmark
jma
opencc
dl
Boost::asio
Boost::thread
thrift::thrift
# thriftnb::thriftnb
libevent::core
Expand All @@ -39,7 +41,8 @@ target_link_libraries(infinity_benchmark
CLI11::CLI11
magic_enum::magic_enum
roaring::roaring
${STDCXX15EXP_STATIC}
# ${STDCXX15EXP_STATIC}
libstdc++exp.a
)

target_link_directories(infinity_benchmark PUBLIC "${CMAKE_BINARY_DIR}/lib")
Expand Down Expand Up @@ -68,6 +71,8 @@ target_link_libraries(knn_import_benchmark
jma
opencc
dl
Boost::asio
Boost::thread
thrift::thrift
# thriftnb::thriftnb
libevent::core
Expand All @@ -92,7 +97,8 @@ target_link_libraries(knn_import_benchmark
CLI11::CLI11
magic_enum::magic_enum
roaring::roaring
${STDCXX15EXP_STATIC}
# ${STDCXX15EXP_STATIC}
libstdc++exp.a
)

target_link_directories(knn_import_benchmark PUBLIC "${CMAKE_BINARY_DIR}/lib")
Expand All @@ -119,6 +125,8 @@ target_link_libraries(knn_query_benchmark
jma
opencc
dl
Boost::asio
Boost::thread
Parquet::parquet_static
Arrow::arrow_static
thrift::thrift
Expand All @@ -143,7 +151,8 @@ target_link_libraries(knn_query_benchmark
CLI11::CLI11
magic_enum::magic_enum
roaring::roaring
${STDCXX15EXP_STATIC}
# ${STDCXX15EXP_STATIC}
libstdc++exp.a
)

target_link_directories(knn_query_benchmark PUBLIC "${CMAKE_BINARY_DIR}/lib")
Expand Down Expand Up @@ -171,6 +180,8 @@ target_link_libraries(fulltext_benchmark
jma
opencc
dl
Boost::asio
Boost::thread
thrift::thrift
# thriftnb::thriftnb
libevent::core
Expand All @@ -195,7 +206,8 @@ target_link_libraries(fulltext_benchmark
CLI11::CLI11
magic_enum::magic_enum
roaring::roaring
${STDCXX15EXP_STATIC}
# ${STDCXX15EXP_STATIC}
libstdc++exp.a
)

target_link_directories(fulltext_benchmark PUBLIC "${CMAKE_BINARY_DIR}/lib")
Expand All @@ -221,6 +233,8 @@ target_link_libraries(sparse_benchmark
jma
opencc
dl
Boost::asio
Boost::thread
Parquet::parquet_static
Arrow::arrow_static
${JEMALLOC_STATIC_LIB}
Expand All @@ -241,7 +255,8 @@ target_link_libraries(sparse_benchmark
CLI11::CLI11
magic_enum::magic_enum
roaring::roaring
${STDCXX15EXP_STATIC}
# ${STDCXX15EXP_STATIC}
libstdc++exp.a
)

target_link_directories(sparse_benchmark PUBLIC "${CMAKE_BINARY_DIR}/lib")
Expand All @@ -266,6 +281,8 @@ target_link_libraries(bmp_benchmark
jma
opencc
dl
Boost::asio
Boost::thread
thrift::thrift
# thriftnb::thriftnb
libevent::core
Expand All @@ -290,7 +307,8 @@ target_link_libraries(bmp_benchmark
CLI11::CLI11
magic_enum::magic_enum
roaring::roaring
${STDCXX15EXP_STATIC}
# ${STDCXX15EXP_STATIC}
libstdc++exp.a
)

target_link_directories(bmp_benchmark PUBLIC "${CMAKE_BINARY_DIR}/lib")
Expand All @@ -315,6 +333,8 @@ target_link_libraries(hnsw_benchmark
jma
opencc
dl
Boost::asio
Boost::thread
Parquet::parquet_static
Arrow::arrow_static
thrift::thrift
Expand All @@ -339,7 +359,8 @@ target_link_libraries(hnsw_benchmark
CLI11::CLI11
magic_enum::magic_enum
roaring::roaring
${STDCXX15EXP_STATIC}
# ${STDCXX15EXP_STATIC}
libstdc++exp.a
)

target_link_directories(hnsw_benchmark PUBLIC "${CMAKE_BINARY_DIR}/lib")
Expand Down
14 changes: 7 additions & 7 deletions benchmark/local_infinity/knn/knn_query_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,9 +219,9 @@ int main(int argc, char *argv[]) {
infinity
->Search(db_name, table_name, search_expr, nullptr, nullptr, nullptr, output_columns, nullptr, nullptr, nullptr, nullptr, false);
{
auto &cv = result.result_table_->GetDataBlockById(0)->column_vectors;
auto &cv = result.result_table_->GetDataBlockById(0)->column_vectors_;
auto &column = *cv[0];
auto data = reinterpret_cast<const RowID *>(column.data());
auto data = reinterpret_cast<const RowID *>(column.data().get());
auto cnt = column.Size();
for (size_t i = 0; i < cnt; ++i) {
query_results[query_idx].emplace_back(data[i].ToUint64());
Expand Down Expand Up @@ -286,12 +286,12 @@ int main(int argc, char *argv[]) {
std::shared_ptr<Infinity> infinity = Infinity::LocalConnect();
QueryResult cache_result = infinity->ShowCache();

auto &vectors = cache_result.result_table_->GetDataBlockById(0)->column_vectors;
auto &vectors = cache_result.result_table_->GetDataBlockById(0)->column_vectors_;
std::cout << "columns: " << vectors.size() << std::endl;
auto column1 = reinterpret_cast<const u64 *>(vectors[1]->data());
auto column2 = reinterpret_cast<const u64 *>(vectors[2]->data());
auto column3 = reinterpret_cast<const u64 *>(vectors[3]->data());
auto column4 = reinterpret_cast<const double *>(vectors[4]->data());
auto column1 = reinterpret_cast<const u64 *>(vectors[1]->data().get());
auto column2 = reinterpret_cast<const u64 *>(vectors[2]->data().get());
auto column3 = reinterpret_cast<const u64 *>(vectors[3]->data().get());
auto column4 = reinterpret_cast<const double *>(vectors[4]->data().get());

std::cout << "Cache db, items: " << column1[0] << ", hits: " << column2[0] << ", request: " << column3[0] << ", hit rate: " << column4[0]
<< std::endl;
Expand Down
Loading
Loading