Skip to content

Commit da2b101

Browse files
committed
refactor: align cpp implementation with guideline rules
1 parent 7b74c68 commit da2b101

6 files changed

Lines changed: 95 additions & 67 deletions

File tree

src/include/lance_resolver.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ class LanceDatasetResolverRegistry {
7979

8080
//! Register a custom resolver
8181
//! Resolvers are sorted by priority (descending) after registration
82-
void RegisterResolver(shared_ptr<ILanceDatasetResolver> resolver);
82+
void RegisterResolver(unique_ptr<ILanceDatasetResolver> resolver);
8383

8484
//! Unregister a resolver by name
8585
//! Returns true if a resolver was removed
@@ -111,7 +111,7 @@ class LanceDatasetResolverRegistry {
111111
void SortResolvers();
112112

113113
mutable mutex lock_;
114-
vector<shared_ptr<ILanceDatasetResolver>> resolvers_;
114+
vector<unique_ptr<ILanceDatasetResolver>> resolvers_;
115115
};
116116

117117
//===--------------------------------------------------------------------===//

src/lance_exec_ir.cpp

Lines changed: 31 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -41,15 +41,15 @@ static void WriteU8(string &out, uint8_t v) {
4141
}
4242

4343
static void WriteU32(string &out, uint32_t v) {
44-
for (int i = 0; i < 4; i++) {
45-
out.push_back(static_cast<char>((v >> (i * 8)) & 0xFF));
44+
for (idx_t byte_idx = 0; byte_idx < 4; byte_idx++) {
45+
out.push_back(static_cast<char>((v >> (byte_idx * 8)) & 0xFF));
4646
}
4747
}
4848

4949
static void WriteI64(string &out, int64_t v) {
5050
auto u = static_cast<uint64_t>(v);
51-
for (int i = 0; i < 8; i++) {
52-
out.push_back(static_cast<char>((u >> (i * 8)) & 0xFF));
51+
for (idx_t byte_idx = 0; byte_idx < 8; byte_idx++) {
52+
out.push_back(static_cast<char>((u >> (byte_idx * 8)) & 0xFF));
5353
}
5454
}
5555

@@ -58,33 +58,34 @@ static void WriteF64(string &out, double v) {
5858
"double must be 64-bit IEEE754");
5959
uint64_t bits;
6060
memcpy(&bits, &v, sizeof(bits));
61-
for (int i = 0; i < 8; i++) {
62-
out.push_back(static_cast<char>((bits >> (i * 8)) & 0xFF));
61+
for (idx_t byte_idx = 0; byte_idx < 8; byte_idx++) {
62+
out.push_back(static_cast<char>((bits >> (byte_idx * 8)) & 0xFF));
6363
}
6464
}
6565

66-
static void WriteBytes(string &out, const_data_ptr_t ptr, size_t len) {
67-
out.append(reinterpret_cast<const char *>(ptr), len);
66+
static void WriteBytes(string &out, const_data_ptr_t ptr, idx_t len) {
67+
out.append(reinterpret_cast<const char *>(ptr), NumericCast<size_t>(len));
6868
}
6969

7070
static void WriteHugeint(string &out, hugeint_t v) {
7171
uint64_t lower = v.lower;
7272
int64_t upper = v.upper;
73-
for (int i = 0; i < 8; i++) {
74-
out.push_back(static_cast<char>((lower >> (i * 8)) & 0xFF));
73+
for (idx_t byte_idx = 0; byte_idx < 8; byte_idx++) {
74+
out.push_back(static_cast<char>((lower >> (byte_idx * 8)) & 0xFF));
7575
}
7676
auto upper_u = static_cast<uint64_t>(upper);
77-
for (int i = 0; i < 8; i++) {
78-
out.push_back(static_cast<char>((upper_u >> (i * 8)) & 0xFF));
77+
for (idx_t byte_idx = 0; byte_idx < 8; byte_idx++) {
78+
out.push_back(static_cast<char>((upper_u >> (byte_idx * 8)) & 0xFF));
7979
}
8080
}
8181

8282
static void WriteString(string &out, const string &s) {
83-
if (s.size() > NumericCast<size_t>(NumericLimits<uint32_t>::Maximum())) {
83+
auto string_size = NumericCast<idx_t>(s.size());
84+
if (string_size > NumericCast<idx_t>(NumericLimits<uint32_t>::Maximum())) {
8485
throw InvalidInputException("ExecIR string too large");
8586
}
86-
WriteU32(out, NumericCast<uint32_t>(s.size()));
87-
WriteBytes(out, reinterpret_cast<const_data_ptr_t>(s.data()), s.size());
87+
WriteU32(out, NumericCast<uint32_t>(string_size));
88+
WriteBytes(out, reinterpret_cast<const_data_ptr_t>(s.data()), string_size);
8889
}
8990

9091
enum class ExecIrAggFunc : uint8_t {
@@ -884,25 +885,26 @@ bool TryEncodeLanceExecIRv1(
884885
WriteU32(out_exec_ir, 4); // version
885886
WriteU32(out_exec_ir, 0); // reserved flags
886887

887-
if (filter_ir_msg.size() >
888-
NumericCast<size_t>(NumericLimits<uint32_t>::Maximum())) {
888+
if (NumericCast<idx_t>(filter_ir_msg.size()) >
889+
NumericCast<idx_t>(NumericLimits<uint32_t>::Maximum())) {
889890
return false;
890891
}
891892
WriteU32(out_exec_ir, NumericCast<uint32_t>(filter_ir_msg.size()));
892893
WriteBytes(out_exec_ir,
893894
reinterpret_cast<const_data_ptr_t>(filter_ir_msg.data()),
894-
filter_ir_msg.size());
895+
NumericCast<idx_t>(filter_ir_msg.size()));
895896

896-
if (scan_projection.size() >
897-
NumericCast<size_t>(NumericLimits<uint32_t>::Maximum())) {
897+
if (NumericCast<idx_t>(scan_projection.size()) >
898+
NumericCast<idx_t>(NumericLimits<uint32_t>::Maximum())) {
898899
return false;
899900
}
900901
WriteU32(out_exec_ir, NumericCast<uint32_t>(scan_projection.size()));
901902
for (auto &name : scan_projection) {
902903
WriteString(out_exec_ir, name);
903904
}
904905

905-
if (groups.size() > NumericCast<size_t>(NumericLimits<uint32_t>::Maximum())) {
906+
if (NumericCast<idx_t>(groups.size()) >
907+
NumericCast<idx_t>(NumericLimits<uint32_t>::Maximum())) {
906908
return false;
907909
}
908910
WriteU32(out_exec_ir, NumericCast<uint32_t>(groups.size()));
@@ -911,16 +913,17 @@ bool TryEncodeLanceExecIRv1(
911913
if (!g.encoded_expr.empty()) {
912914
WriteBytes(out_exec_ir,
913915
reinterpret_cast<const_data_ptr_t>(g.encoded_expr.data()),
914-
g.encoded_expr.size());
916+
NumericCast<idx_t>(g.encoded_expr.size()));
915917
}
916918
if (!g.output_type_hint.empty()) {
917919
WriteBytes(out_exec_ir,
918920
reinterpret_cast<const_data_ptr_t>(g.output_type_hint.data()),
919-
g.output_type_hint.size());
921+
NumericCast<idx_t>(g.output_type_hint.size()));
920922
}
921923
}
922924

923-
if (aggs.size() > NumericCast<size_t>(NumericLimits<uint32_t>::Maximum())) {
925+
if (NumericCast<idx_t>(aggs.size()) >
926+
NumericCast<idx_t>(NumericLimits<uint32_t>::Maximum())) {
924927
return false;
925928
}
926929
WriteU32(out_exec_ir, NumericCast<uint32_t>(aggs.size()));
@@ -931,13 +934,13 @@ bool TryEncodeLanceExecIRv1(
931934
if (!agg.encoded_args.empty()) {
932935
WriteBytes(out_exec_ir,
933936
reinterpret_cast<const_data_ptr_t>(agg.encoded_args.data()),
934-
agg.encoded_args.size());
937+
NumericCast<idx_t>(agg.encoded_args.size()));
935938
}
936939
if (!agg.output_type_hint.empty()) {
937940
WriteBytes(
938941
out_exec_ir,
939942
reinterpret_cast<const_data_ptr_t>(agg.output_type_hint.data()),
940-
agg.output_type_hint.size());
943+
NumericCast<idx_t>(agg.output_type_hint.size()));
941944
}
942945
}
943946

@@ -1105,8 +1108,8 @@ bool TryEncodeLanceExecIRv1(
11051108
}
11061109
}
11071110

1108-
if (order_by.size() >
1109-
NumericCast<size_t>(NumericLimits<uint32_t>::Maximum())) {
1111+
if (NumericCast<idx_t>(order_by.size()) >
1112+
NumericCast<idx_t>(NumericLimits<uint32_t>::Maximum())) {
11101113
return false;
11111114
}
11121115
WriteU32(out_exec_ir, NumericCast<uint32_t>(order_by.size()));

src/lance_resolver.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,11 @@ LanceDatasetResolverRegistry &LanceDatasetResolverRegistry::Get() {
4848

4949
LanceDatasetResolverRegistry::LanceDatasetResolverRegistry() {
5050
// Register default resolver
51-
resolvers_.push_back(make_shared_ptr<DefaultCatalogResolver>());
51+
resolvers_.push_back(make_uniq<DefaultCatalogResolver>());
5252
}
5353

5454
void LanceDatasetResolverRegistry::RegisterResolver(
55-
shared_ptr<ILanceDatasetResolver> resolver) {
55+
unique_ptr<ILanceDatasetResolver> resolver) {
5656
if (!resolver) {
5757
return;
5858
}
@@ -78,7 +78,7 @@ bool LanceDatasetResolverRegistry::UnregisterResolver(const string &name) {
7878
lock_guard<mutex> guard(lock_);
7979

8080
auto it = std::remove_if(resolvers_.begin(), resolvers_.end(),
81-
[&name](const shared_ptr<ILanceDatasetResolver> &r) {
81+
[&name](const unique_ptr<ILanceDatasetResolver> &r) {
8282
return r->Name() == name;
8383
});
8484

@@ -102,8 +102,8 @@ vector<string> LanceDatasetResolverRegistry::GetResolverNames() const {
102102
void LanceDatasetResolverRegistry::SortResolvers() {
103103
// Sort by priority (descending) - higher priority runs first
104104
std::sort(resolvers_.begin(), resolvers_.end(),
105-
[](const shared_ptr<ILanceDatasetResolver> &a,
106-
const shared_ptr<ILanceDatasetResolver> &b) {
105+
[](const unique_ptr<ILanceDatasetResolver> &a,
106+
const unique_ptr<ILanceDatasetResolver> &b) {
107107
return a->Priority() > b->Priority();
108108
});
109109
}

src/lance_scan.cpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -250,10 +250,10 @@ static bool TryLanceExplainDatasetScan(void *dataset,
250250
}
251251

252252
const uint8_t *filter_ptr = nullptr;
253-
size_t filter_len = 0;
253+
idx_t filter_len = 0;
254254
if (filter_ir && !filter_ir->empty()) {
255255
filter_ptr = reinterpret_cast<const uint8_t *>(filter_ir->data()); // NOLINT
256-
filter_len = filter_ir->size();
256+
filter_len = NumericCast<idx_t>(filter_ir->size());
257257
}
258258

259259
auto limit_i64 = pushed_limit.IsValid()
@@ -263,7 +263,8 @@ static bool TryLanceExplainDatasetScan(void *dataset,
263263

264264
auto *plan_ptr = lance_explain_dataset_scan_ir(
265265
dataset, col_ptrs.empty() ? nullptr : col_ptrs.data(), col_ptrs.size(),
266-
filter_ptr, filter_len, limit_i64, offset_i64, verbose ? 1 : 0);
266+
filter_ptr, NumericCast<size_t>(filter_len), limit_i64, offset_i64,
267+
verbose ? 1 : 0);
267268
if (!plan_ptr) {
268269
out_error = LanceConsumeLastError();
269270
if (out_error.empty()) {
@@ -1122,15 +1123,17 @@ LanceScanInitGlobal(ClientContext &context, TableFunctionInitInput &input) {
11221123
return state;
11231124
}
11241125

1125-
size_t fragment_count = 0;
1126+
size_t ffi_fragment_count = 0;
11261127
auto fragments_ptr =
1127-
lance_dataset_list_fragments(bind_data.dataset, &fragment_count);
1128+
lance_dataset_list_fragments(bind_data.dataset, &ffi_fragment_count);
11281129
if (!fragments_ptr) {
11291130
throw IOException("Failed to list Lance fragments" +
11301131
LanceFormatErrorSuffix());
11311132
}
1132-
scan_state.fragment_ids.assign(fragments_ptr, fragments_ptr + fragment_count);
1133-
lance_free_fragment_list(fragments_ptr, fragment_count);
1133+
auto fragment_count = NumericCast<idx_t>(ffi_fragment_count);
1134+
scan_state.fragment_ids.assign(
1135+
fragments_ptr, fragments_ptr + NumericCast<size_t>(fragment_count));
1136+
lance_free_fragment_list(fragments_ptr, ffi_fragment_count);
11341137

11351138
auto threads = context.db->NumberOfThreads();
11361139
scan_state.max_threads = MaxValue<idx_t>(

src/lance_search.cpp

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -57,16 +57,16 @@ static bool TryLanceExplainKnn(void *dataset, const string &vector_column,
5757
}
5858

5959
const uint8_t *filter_ptr = nullptr;
60-
size_t filter_len = 0;
60+
idx_t filter_len = 0;
6161
if (filter_ir && !filter_ir->empty()) {
6262
filter_ptr = reinterpret_cast<const uint8_t *>(filter_ir->data());
63-
filter_len = filter_ir->size();
63+
filter_len = NumericCast<idx_t>(filter_ir->size());
6464
}
6565

6666
auto *plan_ptr = lance_explain_knn_scan_ir(
6767
dataset, vector_column.c_str(), query.data(), query.size(), k, nprobes,
68-
refine_factor, filter_ptr, filter_len, prefilter ? 1 : 0,
69-
use_index ? 1 : 0, verbose ? 1 : 0);
68+
refine_factor, filter_ptr, NumericCast<size_t>(filter_len),
69+
prefilter ? 1 : 0, use_index ? 1 : 0, verbose ? 1 : 0);
7070
if (!plan_ptr) {
7171
out_error = LanceConsumeLastError();
7272
if (out_error.empty()) {
@@ -82,15 +82,29 @@ static bool TryLanceExplainKnn(void *dataset, const string &vector_column,
8282

8383
static LanceTableEntry *TryResolveLanceTableEntry(ClientContext &context,
8484
const string &input) {
85+
if (input.empty() || input.find('/') != string::npos ||
86+
input.find('\\') != string::npos || input.find("://") != string::npos) {
87+
return nullptr;
88+
}
89+
90+
QualifiedName qname;
8591
try {
86-
auto qname = QualifiedName::Parse(input);
87-
auto &entry = Catalog::GetEntry(context, CatalogType::TABLE_ENTRY,
88-
qname.catalog, qname.schema, qname.name);
89-
auto &table_entry = entry.Cast<TableCatalogEntry>();
90-
return dynamic_cast<LanceTableEntry *>(&table_entry);
91-
} catch (Exception &) {
92+
qname = QualifiedName::Parse(input);
93+
} catch (ParserException &) {
94+
return nullptr;
95+
}
96+
97+
EntryLookupInfo lookup_info(CatalogType::TABLE_ENTRY, qname.name);
98+
auto entry = Catalog::GetEntry(context, qname.catalog, qname.schema,
99+
lookup_info, OnEntryNotFound::RETURN_NULL);
100+
if (!entry) {
101+
return nullptr;
102+
}
103+
auto *table_entry = dynamic_cast<TableCatalogEntry *>(entry.get());
104+
if (!table_entry) {
92105
return nullptr;
93106
}
107+
return dynamic_cast<LanceTableEntry *>(table_entry);
94108
}
95109

96110
static void *OpenSearchDataset(ClientContext &context, const Value &input,
@@ -848,20 +862,21 @@ static bool LanceSearchLoadNextBatch(LanceSearchLocalState &local_state,
848862
global.lance_filter_ir.empty()
849863
? nullptr
850864
: reinterpret_cast<const uint8_t *>(global.lance_filter_ir.data());
851-
auto filter_ir_len = global.lance_filter_ir.size();
865+
auto filter_ir_len = NumericCast<idx_t>(global.lance_filter_ir.size());
852866

853-
auto create_stream = [&](const uint8_t *ir, size_t ir_len) -> void * {
867+
auto create_stream = [&](const uint8_t *ir, idx_t ir_len) -> void * {
854868
if (bind_data.mode == LanceSearchMode::Fts) {
855869
return lance_create_fts_stream_ir(
856870
bind_data.dataset, bind_data.text_column.c_str(),
857-
bind_data.query.c_str(), bind_data.k, ir, ir_len,
858-
bind_data.prefilter ? 1 : 0);
871+
bind_data.query.c_str(), bind_data.k, ir,
872+
NumericCast<size_t>(ir_len), bind_data.prefilter ? 1 : 0);
859873
}
860874
return lance_create_hybrid_stream_ir(
861875
bind_data.dataset, bind_data.vector_column.c_str(),
862876
bind_data.vector_query.data(), bind_data.vector_query.size(),
863877
bind_data.text_column.c_str(), bind_data.text_query.c_str(),
864-
bind_data.k, ir, ir_len, bind_data.prefilter ? 1 : 0, bind_data.alpha,
878+
bind_data.k, ir, NumericCast<size_t>(ir_len),
879+
bind_data.prefilter ? 1 : 0, bind_data.alpha,
865880
bind_data.oversample_factor);
866881
};
867882

src/lance_storage.cpp

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -102,15 +102,21 @@ static string GetLanceNamespaceHeaders(const AttachInfo &info) {
102102
kv.second.DefaultCastAs(LogicalType::VARCHAR).GetValue<string>();
103103
// Split by semicolon to support multiple headers
104104
vector<string> header_parts;
105-
size_t pos = 0;
106-
while (pos < header_str.size()) {
107-
auto next_semi = header_str.find(';', pos);
105+
idx_t pos = 0;
106+
auto header_str_size = NumericCast<idx_t>(header_str.size());
107+
while (pos < header_str_size) {
108+
auto next_semi =
109+
header_str.find(';', NumericCast<string::size_type>(pos));
108110
if (next_semi == string::npos) {
109-
header_parts.push_back(header_str.substr(pos));
111+
header_parts.push_back(
112+
header_str.substr(NumericCast<string::size_type>(pos)));
110113
break;
111114
}
112-
header_parts.push_back(header_str.substr(pos, next_semi - pos));
113-
pos = next_semi + 1;
115+
auto next_semi_idx = NumericCast<idx_t>(next_semi);
116+
header_parts.push_back(header_str.substr(
117+
NumericCast<string::size_type>(pos),
118+
NumericCast<string::size_type>(next_semi_idx - pos)));
119+
pos = next_semi_idx + 1;
114120
}
115121
for (auto &part : header_parts) {
116122
// Trim whitespace
@@ -1607,8 +1613,8 @@ class LanceTransactionManager final : public DuckTransactionManager {
16071613
}
16081614
}
16091615

1610-
for (idx_t i = 0; i < appends.size(); i++) {
1611-
auto &pending = appends[i];
1616+
for (idx_t append_idx = 0; append_idx < appends.size(); append_idx++) {
1617+
auto &pending = appends[append_idx];
16121618
vector<const char *> key_ptrs;
16131619
vector<const char *> value_ptrs;
16141620
BuildStorageOptionPointerArrays(
@@ -1622,8 +1628,9 @@ class LanceTransactionManager final : public DuckTransactionManager {
16221628
// Best-effort cleanup of any remaining pending transactions.
16231629
// Note: the transaction pointer is consumed by the commit call, even on
16241630
// error.
1625-
for (idx_t k = i + 1; k < appends.size(); k++) {
1626-
lance_free_transaction(appends[k].transaction);
1631+
for (idx_t cleanup_idx = append_idx + 1; cleanup_idx < appends.size();
1632+
cleanup_idx++) {
1633+
lance_free_transaction(appends[cleanup_idx].transaction);
16271634
}
16281635
DuckTransactionManager::RollbackTransaction(transaction_p);
16291636
return ErrorData(ExceptionType::TRANSACTION,

0 commit comments

Comments
 (0)