Skip to content

Commit 6839dd5

Browse files
csun5285claude
andcommitted
[refactor](inverted-index) replace void* query_value with typed param interface
Production query path no longer carries a const void* + reinterpret_cast through InvertedIndexReader::query / try_query. Three classes with distinct responsibilities replace the old conflated InvertedIndexQueryParamFactory: * InvertedIndexQueryParam — abstract value interface; readers pull the value via typed virtuals (get_string / encode_ascending / encode_min_ascending / encode_max_ascending). * TypedInvertedIndexQueryParam<PT> — concrete typed value; numeric/ date/decimal/IP specialisation implements the encode_* virtuals using type_limit<>; string specialisation implements get_string only. * InvertedIndexQueryParamFactory — static-only namespace class that maps FE values onto the correct TypedInvertedIndexQueryParam<PT>; no instances, no inheritance. BkdIndexReader::construct_bkd_query_value drops the std::vector<char> tmp scratch buffer and the _type_info->set_to_min/max calls used to synthesize +/-infinity sentinels for half-bounded range queries. The sentinel is now produced directly by the typed query value (encode_min_ascending / encode_max_ascending), so only inverted-index supported types ever need to know how to emit a min/max. With BKD no longer the only consumer, the entire TypeInfo::set_to_min/max API surface is removed: TypeInfo virtuals, ScalarTypeInfo storage, List/Map/Struct DCHECK-fail overrides, every FieldTypeTraits<...> specialization, the OLAP_FIELD_TYPE_CHAR static function pointer in types.cpp, Field::set_to_min/max wrappers, and the CharField/VarcharField /StringField overrides. Corresponding storage_types_test cases are removed. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 8ca1b90 commit 6839dd5

27 files changed

Lines changed: 1109 additions & 586 deletions

be/src/core/type_limit.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@
1717

1818
#pragma once
1919

20+
#include "core/decimal12.h"
2021
#include "core/extended_types.h"
2122
#include "core/string_ref.h"
23+
#include "core/uint24.h"
2224
#include "core/value/decimalv2_value.h"
2325
#include "core/value/timestamptz_value.h"
2426

@@ -54,6 +56,24 @@ struct type_limit<DecimalV2Value> {
5456
static DecimalV2Value max() { return DecimalV2Value::get_max_decimal(); }
5557
};
5658

59+
// std::numeric_limits is not specialised for these custom storage types, so
60+
// the generic type_limit would return T() = zero for both min and max,
61+
// silently breaking BKD half-bounded range queries.
62+
63+
// DECIMALV2 storage. Largest representable DecimalV2 value (18 digits . 9 digits).
64+
template <>
65+
struct type_limit<decimal12_t> {
66+
static decimal12_t min() { return decimal12_t {-999999999999999999LL, -999999999}; }
67+
static decimal12_t max() { return decimal12_t {+999999999999999999LL, +999999999}; }
68+
};
69+
70+
// DATE storage. Packed as `year<<9 | month<<5 | day`: 33=0001-01-01, 5119903=9999-12-31.
71+
template <>
72+
struct type_limit<uint24_t> {
73+
static uint24_t min() { return uint24_t(33); }
74+
static uint24_t max() { return uint24_t(5119903); }
75+
};
76+
5777
template <>
5878
struct type_limit<Decimal32> {
5979
static Decimal32 max() { return 999999999; }

be/src/exprs/function/array/function_array_index.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
#include "core/types.h"
4646
#include "exprs/function/function.h"
4747
#include "storage/index/index_reader_helper.h"
48+
#include "storage/index/inverted/inverted_index_query_param.h"
4849
#include "storage/index/inverted/inverted_index_query_type.h"
4950
#include "storage/index/inverted/inverted_index_reader.h"
5051
#include "storage/predicate/column_predicate.h"
@@ -164,13 +165,13 @@ class FunctionArrayIndex : public IFunction {
164165
RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle));
165166
null_bitmap = null_bitmap_cache_handle.get_bitmap();
166167
}
167-
std::unique_ptr<InvertedIndexQueryParamFactory> query_param = nullptr;
168+
std::unique_ptr<InvertedIndexQueryParam> query_param = nullptr;
168169
RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value(param_type, &param_value,
169170
query_param));
170171
InvertedIndexParam param;
171172
param.column_name = data_type_with_name.first;
172173
param.column_type = data_type_with_name.second;
173-
param.query_value = query_param->get_value();
174+
param.query_value = std::move(query_param);
174175
param.query_type = segment_v2::InvertedIndexQueryType::EQUAL_QUERY;
175176
param.num_rows = num_rows;
176177
param.roaring = std::make_shared<roaring::Roaring>();

be/src/exprs/function/array/function_arrays_overlap.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ class FunctionArraysOverlap : public IFunction {
246246
RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle));
247247
null_bitmap = null_bitmap_cache_handle.get_bitmap();
248248
}
249-
std::unique_ptr<InvertedIndexQueryParamFactory> query_param = nullptr;
249+
std::unique_ptr<InvertedIndexQueryParam> query_param = nullptr;
250250
const Array& query_val = param_value.get<TYPE_ARRAY>();
251251

252252
InvertedIndexParam param;
@@ -262,7 +262,7 @@ class FunctionArraysOverlap : public IFunction {
262262
}
263263
RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value(
264264
nested_param_type, &nested_query_val, query_param));
265-
param.query_value = query_param->get_value();
265+
param.query_value = std::move(query_param);
266266
param.roaring = std::make_shared<roaring::Roaring>();
267267
param.analyzer_ctx = analyzer_ctx;
268268
RETURN_IF_ERROR(iter->read_from_index(segment_v2::IndexParam {&param}));

be/src/exprs/function/function_ip.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
#include "exprs/function/function.h"
4949
#include "exprs/function/function_helpers.h"
5050
#include "storage/index/index_reader_helper.h"
51+
#include "storage/index/inverted/inverted_index_query_param.h"
5152

5253
namespace doris {
5354

@@ -708,7 +709,7 @@ class FunctionIsIPAddressInRange : public IFunction {
708709
std::shared_ptr<roaring::Roaring> null_bitmap = std::make_shared<roaring::Roaring>();
709710

710711
auto param_type = data_type_with_name.second->get_primitive_type();
711-
std::unique_ptr<segment_v2::InvertedIndexQueryParamFactory> query_param = nullptr;
712+
std::unique_ptr<segment_v2::InvertedIndexQueryParam> query_param = nullptr;
712713

713714
// >= min ip
714715
RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value(
@@ -717,7 +718,7 @@ class FunctionIsIPAddressInRange : public IFunction {
717718
min_param.column_name = data_type_with_name.first;
718719
min_param.column_type = data_type_with_name.second;
719720
min_param.query_type = segment_v2::InvertedIndexQueryType::GREATER_EQUAL_QUERY;
720-
min_param.query_value = query_param->get_value();
721+
min_param.query_value = std::move(query_param);
721722
min_param.num_rows = num_rows;
722723
min_param.roaring = std::make_shared<roaring::Roaring>();
723724
RETURN_IF_ERROR(iter->read_from_index(&min_param));
@@ -729,7 +730,7 @@ class FunctionIsIPAddressInRange : public IFunction {
729730
max_param.column_name = data_type_with_name.first;
730731
max_param.column_type = data_type_with_name.second;
731732
max_param.query_type = segment_v2::InvertedIndexQueryType::LESS_EQUAL_QUERY;
732-
max_param.query_value = query_param->get_value();
733+
max_param.query_value = std::move(query_param);
733734
max_param.num_rows = num_rows;
734735
max_param.roaring = std::make_shared<roaring::Roaring>();
735736
RETURN_IF_ERROR(iter->read_from_index(&max_param));

be/src/exprs/function/function_multi_match.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "exprs/function/simple_function_factory.h"
2929
#include "exprs/vslot_ref.h"
3030
#include "io/fs/file_reader.h"
31+
#include "storage/index/inverted/inverted_index_query_param.h"
3132
#include "storage/index/inverted/query/phrase_prefix_query.h"
3233
#include "storage/segment/segment_iterator.h"
3334

@@ -79,13 +80,15 @@ Status FunctionMultiMatch::evaluate_inverted_index(
7980
return Status::Error<ErrorCode::INDEX_INVALID_PARAMETERS>(
8081
"arguments for multi_match must be string");
8182
}
82-
// Must convert StringRef to std::string because downstream readers
83-
// (e.g. FullTextIndexReader::query) reinterpret_cast query_value as std::string*.
84-
std::string query_str(query_str_ref.data, query_str_ref.size);
83+
// Wrap the query string in a typed InvertedIndexQueryParam so the reader
84+
// can pull it via get_string() instead of relying on a void*->std::string
85+
// reinterpret_cast.
86+
auto query_param = segment_v2::TypedInvertedIndexQueryParam<TYPE_STRING>::create_unique();
87+
query_param->set_value(&query_str_ref);
8588

8689
// search
8790
InvertedIndexParam param;
88-
param.query_value = &query_str;
91+
param.query_value = std::move(query_param);
8992
param.query_type = query_type;
9093
param.num_rows = num_rows;
9194
for (size_t i = 0; i < data_type_with_names.size(); i++) {

be/src/exprs/function/functions_comparison.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
#include "exprs/function/function_helpers.h"
4141
#include "exprs/function/functions_logical.h"
4242
#include "storage/index/index_reader_helper.h"
43+
#include "storage/index/inverted/inverted_index_query_param.h"
4344

4445
namespace doris {
4546

@@ -487,14 +488,14 @@ class FunctionComparison : public IFunction {
487488
return Status::OK();
488489
}
489490
auto param_type = arguments[0].type->get_primitive_type();
490-
std::unique_ptr<segment_v2::InvertedIndexQueryParamFactory> query_param = nullptr;
491+
std::unique_ptr<segment_v2::InvertedIndexQueryParam> query_param = nullptr;
491492
RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value(
492493
param_type, &param_value, query_param));
493494

494495
segment_v2::InvertedIndexParam param;
495496
param.column_name = data_type_with_name.first;
496497
param.column_type = data_type_with_name.second;
497-
param.query_value = query_param->get_value();
498+
param.query_value = std::move(query_param);
498499
param.query_type = query_type;
499500
param.num_rows = num_rows;
500501
param.roaring = std::make_shared<roaring::Roaring>();

be/src/exprs/function/in.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
#include "exprs/function_context.h"
4747
#include "exprs/hybrid_set.h"
4848
#include "storage/index/index_reader_helper.h"
49+
#include "storage/index/inverted/inverted_index_query_param.h"
4950

5051
namespace doris {
5152

@@ -170,14 +171,14 @@ class FunctionIn : public IFunction {
170171
*roaring |= *null_bitmap;
171172
continue;
172173
}
173-
std::unique_ptr<InvertedIndexQueryParamFactory> query_param = nullptr;
174+
std::unique_ptr<InvertedIndexQueryParam> query_param = nullptr;
174175
RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value(
175176
param_type, &param_value, query_param));
176177
InvertedIndexQueryType query_type = InvertedIndexQueryType::EQUAL_QUERY;
177178
segment_v2::InvertedIndexParam param;
178179
param.column_name = data_type_with_name.first;
179180
param.column_type = data_type_with_name.second;
180-
param.query_value = query_param->get_value();
181+
param.query_value = std::move(query_param);
181182
param.query_type = query_type;
182183
param.num_rows = num_rows;
183184
param.roaring = std::make_shared<roaring::Roaring>();

be/src/exprs/function/match.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "runtime/runtime_state.h"
2424
#include "storage/index/index_reader_helper.h"
2525
#include "storage/index/inverted/analyzer/analyzer.h"
26+
#include "storage/index/inverted/inverted_index_query_param.h"
2627
#include "util/debug_points.h"
2728

2829
namespace doris {
@@ -79,14 +80,14 @@ Status FunctionMatchBase::evaluate_inverted_index(
7980
return Status::Error<ErrorCode::INDEX_INVALID_PARAMETERS>(
8081
"arguments for match must be string");
8182
}
82-
std::unique_ptr<InvertedIndexQueryParamFactory> query_param = nullptr;
83+
std::unique_ptr<InvertedIndexQueryParam> query_param = nullptr;
8384
RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value(param_type, &param_value,
8485
query_param));
8586

8687
InvertedIndexParam param;
8788
param.column_name = data_type_with_name.first;
8889
param.column_type = data_type_with_name.second;
89-
param.query_value = query_param->get_value();
90+
param.query_value = std::move(query_param);
9091
param.query_type = get_query_type_from_fn_name();
9192
param.num_rows = num_rows;
9293
param.roaring = std::make_shared<roaring::Roaring>();

be/src/storage/field.h

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,6 @@ class StorageField {
6464
const std::string& name() const { return _name; }
6565
const PathInDataPtr& path() const { return _path; }
6666

67-
virtual void set_to_max(char* buf) const { return _type_info->set_to_max(buf); }
68-
69-
virtual void set_to_min(char* buf) const { return _type_info->set_to_min(buf); }
70-
7167
virtual StorageField* clone() const {
7268
auto* local = new StorageField(_desc);
7369
this->clone(local);
@@ -172,12 +168,6 @@ class CharField : public StorageField {
172168
StorageField::clone(local);
173169
return local;
174170
}
175-
176-
void set_to_max(char* ch) const override {
177-
auto slice = reinterpret_cast<Slice*>(ch);
178-
slice->size = _length;
179-
memset(slice->data, 0xFF, slice->size);
180-
}
181171
};
182172

183173
class VarcharField : public StorageField {
@@ -189,12 +179,6 @@ class VarcharField : public StorageField {
189179
StorageField::clone(local);
190180
return local;
191181
}
192-
193-
void set_to_max(char* ch) const override {
194-
auto slice = reinterpret_cast<Slice*>(ch);
195-
slice->size = _length - OLAP_VARCHAR_MAX_BYTES;
196-
memset(slice->data, 0xFF, slice->size);
197-
}
198182
};
199183
class StringField : public StorageField {
200184
public:
@@ -205,11 +189,6 @@ class StringField : public StorageField {
205189
StorageField::clone(local);
206190
return local;
207191
}
208-
209-
void set_to_max(char* ch) const override {
210-
auto slice = reinterpret_cast<Slice*>(ch);
211-
memset(slice->data, 0xFF, slice->size);
212-
}
213192
};
214193

215194
class BitmapAggField : public StorageField {

be/src/storage/index/inverted/inverted_index_iterator.cpp

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,8 @@ Status InvertedIndexIterator::read_from_index(const IndexParam& param) {
8888
runtime_state->query_options().inverted_index_skip_threshold;
8989
size_t hit_count = 0;
9090
RETURN_IF_ERROR(try_read_from_inverted_index(reader, i_param->column_name,
91-
i_param->query_value, i_param->query_type,
92-
&hit_count));
91+
i_param->query_value.get(),
92+
i_param->query_type, &hit_count));
9393
if (hit_count > i_param->num_rows * query_bkd_limit_percent / 100) {
9494
return Status::Error<ErrorCode::INVERTED_INDEX_BYPASS>(
9595
"hit count: {}, bkd inverted reached limit {}% , segment num "
@@ -101,7 +101,7 @@ Status InvertedIndexIterator::read_from_index(const IndexParam& param) {
101101

102102
// Note: analyzer_ctx is now passed via i_param->analyzer_ctx
103103
auto execute_query = [&]() {
104-
return reader->query(_context, i_param->column_name, i_param->query_value,
104+
return reader->query(_context, i_param->column_name, i_param->query_value.get(),
105105
i_param->query_type, i_param->roaring, i_param->analyzer_ctx);
106106
};
107107

@@ -133,11 +133,10 @@ Result<bool> InvertedIndexIterator::has_null() {
133133
return reader->has_null();
134134
}
135135

136-
Status InvertedIndexIterator::try_read_from_inverted_index(const InvertedIndexReaderPtr& reader,
137-
const std::string& column_name,
138-
const void* query_value,
139-
InvertedIndexQueryType query_type,
140-
size_t* count) {
136+
Status InvertedIndexIterator::try_read_from_inverted_index(
137+
const InvertedIndexReaderPtr& reader, const std::string& column_name,
138+
const InvertedIndexQueryParam* query_value, InvertedIndexQueryType query_type,
139+
size_t* count) {
141140
// NOTE: only bkd index support try read now.
142141
if (query_type == InvertedIndexQueryType::GREATER_EQUAL_QUERY ||
143142
query_type == InvertedIndexQueryType::GREATER_THAN_QUERY ||

0 commit comments

Comments
 (0)