enhance: optimize term expr performance (#45490)

sunby · web-flow · commit f1844c984180 · 2025-11-19T11:51:06.000+08:00
issue: #45641 pr: #45491 --------- Signed-off-by: sunby <sunbingyi1992@gmail.com>
diff --git a/internal/core/src/common/Chunk.h b/internal/core/src/common/Chunk.h
@@ -113,7 +113,7 @@ class FixedWidthChunk : public Chunk {
                     std::unique_ptr<MmapFileRAII> mmap_file_raii = nullptr)
         : Chunk(row_nums, data, size, nullable, std::move(mmap_file_raii)),
           dim_(dim),
-          element_size_(element_size) {};
+          element_size_(element_size){};
 
     milvus::SpanBase
     Span() const {
diff --git a/internal/core/src/common/init_c.cpp b/internal/core/src/common/init_c.cpp
@@ -139,7 +139,6 @@ InitDefaultDeleteDumpBatchSize(int32_t val) {
         val);
 }
 
-
 void
 InitTrace(CTraceConfig* config) {
     auto traceConfig = milvus::tracer::TraceConfig{config->exporter,
diff --git a/internal/core/src/exec/expression/Element.h b/internal/core/src/exec/expression/Element.h
@@ -198,7 +198,7 @@ class FlatVectorElement : public MultiElement {
     In(const ValueType& value) const override {
         if (std::holds_alternative<T>(value)) {
             for (const auto& v : values_) {
-                if (v == value)
+                if (v == std::get<T>(value))
                     return true;
             }
         }
diff --git a/internal/core/src/exec/expression/Expr.h b/internal/core/src/exec/expression/Expr.h
@@ -908,7 +908,7 @@ class SegmentExpr : public Expr {
 
     template <typename T, typename FUNC, typename... ValTypes>
     VectorPtr
-    ProcessIndexChunks(FUNC func, ValTypes... values) {
+    ProcessIndexChunks(FUNC func, const ValTypes&... values) {
         typedef std::
             conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
                 IndexInnerType;
diff --git a/internal/core/src/exec/expression/GISFunctionFilterExpr.h b/internal/core/src/exec/expression/GISFunctionFilterExpr.h
@@ -66,7 +66,6 @@ class PhyGISFunctionFilterExpr : public SegmentExpr {
         }
     }
 
-
  private:
     VectorPtr
     EvalForIndexSegment();
diff --git a/internal/core/src/exec/expression/TermExpr.cpp b/internal/core/src/exec/expression/TermExpr.cpp
@@ -810,29 +810,36 @@ PhyTermFilterExpr::ExecVisitorImplForIndex() {
         return nullptr;
     }
 
-    std::vector<IndexInnerType> vals;
-    for (auto& val : expr_->vals_) {
-        if constexpr (std::is_same_v<T, double>) {
-            if (val.has_int64_val()) {
-                // only json field will cast int to double because other fields are casted in proxy
-                vals.emplace_back(static_cast<double>(val.int64_val()));
-                continue;
+    if (!arg_inited_) {
+        std::vector<IndexInnerType> vals;
+        for (auto& val : expr_->vals_) {
+            if constexpr (std::is_same_v<T, double>) {
+                if (val.has_int64_val()) {
+                    // only json field will cast int to double because other fields are casted in proxy
+                    vals.emplace_back(static_cast<double>(val.int64_val()));
+                    continue;
+                }
             }
-        }
 
-        // Generic overflow handling for all types
-        bool overflowed = false;
-        auto converted_val = GetValueFromProtoWithOverflow<T>(val, overflowed);
-        if (!overflowed) {
-            vals.emplace_back(converted_val);
+            // Generic overflow handling for all types
+            bool overflowed = false;
+            auto converted_val =
+                GetValueFromProtoWithOverflow<T>(val, overflowed);
+            if (!overflowed) {
+                vals.emplace_back(converted_val);
+            }
         }
+        arg_set_ = std::make_shared<FlatVectorElement<IndexInnerType>>(vals);
+        arg_inited_ = true;
     }
     auto execute_sub_batch = [](Index* index_ptr,
                                 const std::vector<IndexInnerType>& vals) {
         TermIndexFunc<T> func;
         return func(index_ptr, vals.size(), vals.data());
     };
-    auto res = ProcessIndexChunks<T>(execute_sub_batch, vals);
+    auto args =
+        std::dynamic_pointer_cast<FlatVectorElement<IndexInnerType>>(arg_set_);
+    auto res = ProcessIndexChunks<T>(execute_sub_batch, args->values_);
     AssertInfo(res->size() == real_batch_size,
                "internal error: expr processed rows {} not equal "
                "expect batch size {}",
diff --git a/internal/core/src/exec/expression/UnaryExpr.cpp b/internal/core/src/exec/expression/UnaryExpr.cpp
@@ -575,7 +575,7 @@ PhyUnaryRangeFilterExpr::ExecArrayEqualForIndex(EvalCtx& context,
                 };
             } else {
                 auto size_per_chunk = segment_->size_per_chunk();
-                retrieve = [ size_per_chunk, this ](int64_t offset) -> auto{
+                retrieve = [ size_per_chunk, this ](int64_t offset) -> auto {
                     auto chunk_idx = offset / size_per_chunk;
                     auto chunk_offset = offset % size_per_chunk;
                     const auto& chunk =
diff --git a/internal/core/src/expr/ITypeExpr.h b/internal/core/src/expr/ITypeExpr.h
@@ -767,7 +767,7 @@ class GISFunctionFilterExpr : public ITypeFilterExpr {
         : column_(cloumn),
           op_(op),
           geometry_wkt_(geometry_wkt),
-          distance_(distance) {};
+          distance_(distance){};
     std::string
     ToString() const override {
         if (op_ == proto::plan::GISFunctionFilterExpr_GISOp_DWithin) {
diff --git a/internal/core/src/segcore/DeletedRecord.h b/internal/core/src/segcore/DeletedRecord.h
@@ -214,7 +214,8 @@ class DeletedRecord {
         SortedDeleteList::Accessor accessor(deleted_lists_);
         int total_size = accessor.size();
 
-        while (total_size - dumped_entry_count_.load() > DELETE_DUMP_BATCH_SIZE) {
+        while (total_size - dumped_entry_count_.load() >
+               DELETE_DUMP_BATCH_SIZE) {
             int32_t bitsize = 0;
             if constexpr (is_sealed) {
                 bitsize = sealed_row_count_;
@@ -232,11 +233,14 @@ class DeletedRecord {
                                              snapshots_.back().second.size());
             }
 
-            while (total_size - dumped_entry_count_.load() > DELETE_DUMP_BATCH_SIZE &&
+            while (total_size - dumped_entry_count_.load() >
+                       DELETE_DUMP_BATCH_SIZE &&
                    it != accessor.end()) {
                 Timestamp dump_ts = 0;
 
-                for (auto size = 0; size < DELETE_DUMP_BATCH_SIZE && it != accessor.end(); ++it, ++size) {
+                for (auto size = 0;
+                     size < DELETE_DUMP_BATCH_SIZE && it != accessor.end();
+                     ++it, ++size) {
                     bitmap.set(it->second);
                     dump_ts = it->first;
                 }
diff --git a/internal/core/src/storage/FileWriter.h b/internal/core/src/storage/FileWriter.h
@@ -294,8 +294,7 @@ class FileWriter {
     // for global configuration
     static WriteMode
         mode_;  // The write mode, which can be 'buffered' (default) or 'direct'.
-    static size_t
-        buffer_size_;
+    static size_t buffer_size_;
 
     // for rate limiter
     io::Priority priority_;
diff --git a/internal/core/unittest/test_utils/DataGen.h b/internal/core/unittest/test_utils/DataGen.h
@@ -343,8 +343,7 @@ GenerateRandomSparseFloatVector(size_t rows,
     return tensor;
 }
 
-inline std::string
-generateRandomPoint() {
+inline std::string generateRandomPoint() {
     return "POINT(" +
            std::to_string(static_cast<double>(rand()) / RAND_MAX * 360.0 -
                           180.0) +

Original file line number	Diff line number	Diff line change
`@@ -139,7 +139,6 @@ InitDefaultDeleteDumpBatchSize(int32_t val) {`
`139`	`139`	`val);`
`140`	`140`	`}`
`141`	`141`
`142`		`-`
`143`	`142`	`void`
`144`	`143`	`InitTrace(CTraceConfig* config) {`
`145`	`144`	`auto traceConfig = milvus::tracer::TraceConfig{config->exporter,`
Original file line number	Diff line number	Diff line change
`@@ -198,7 +198,7 @@ class FlatVectorElement : public MultiElement {`
`198`	`198`	`In(const ValueType& value) const override {`
`199`	`199`	`if (std::holds_alternative<T>(value)) {`
`200`	`200`	`for (const auto& v : values_) {`
`201`		`- if (v == value)`
	`201`	`+ if (v == std::get<T>(value))`
`202`	`202`	`return true;`
`203`	`203`	`}`
`204`	`204`	`}`
Original file line number	Diff line number	Diff line change
`@@ -66,7 +66,6 @@ class PhyGISFunctionFilterExpr : public SegmentExpr {`
`66`	`66`	`}`
`67`	`67`	`}`
`68`	`68`
`69`		`-`
`70`	`69`	`private:`
`71`	`70`	`VectorPtr`
`72`	`71`	`EvalForIndexSegment();`