Skip to content

Commit 3238392

Browse files
authored
chore: pin duckdb 4a7aee9 and fix extension compatibility (#148)
1 parent 9023842 commit 3238392

14 files changed

Lines changed: 97 additions & 83 deletions

duckdb

Submodule duckdb updated 3417 files

src/include/lance_scan_bind_data.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <cstdint>
99

1010
namespace duckdb {
11+
class TableCatalogEntry;
1112

1213
struct LanceScanBindData : public TableFunctionData {
1314
string file_path;
@@ -21,6 +22,7 @@ struct LanceScanBindData : public TableFunctionData {
2122
vector<LogicalType> types;
2223
vector<string> lance_pushed_filter_ir_parts;
2324
vector<string> duckdb_pushed_filter_sql_parts;
25+
optional_ptr<TableCatalogEntry> table_entry = nullptr;
2426

2527
bool sampling_pushed_down = false;
2628
double sample_percentage = 0.0;

src/lance_delete.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,8 +157,8 @@ class PhysicalLanceDelete final : public PhysicalOperator {
157157
return make_uniq<LanceDeleteSourceState>();
158158
}
159159

160-
SourceResultType GetData(ExecutionContext &context, DataChunk &chunk,
161-
OperatorSourceInput &input) const override {
160+
SourceResultType GetDataInternal(ExecutionContext &context, DataChunk &chunk,
161+
OperatorSourceInput &input) const override {
162162
auto &state = input.global_state.Cast<LanceDeleteSourceState>();
163163
if (state.emitted) {
164164
return SourceResultType::FINISHED;

src/lance_extension.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ void RegisterLanceSearch(ExtensionLoader &loader);
1818
void RegisterLanceReplacement(DBConfig &config);
1919
void RegisterLanceWrite(ExtensionLoader &loader);
2020
void RegisterLanceStorage(DBConfig &config);
21-
void RegisterLanceTruncate(DBConfig &config);
21+
void RegisterLanceTruncate(DBConfig &config, ExtensionLoader &loader);
2222
void RegisterLanceIndex(DBConfig &config, ExtensionLoader &loader);
2323
void RegisterLanceScanOptimizer(DBConfig &config);
2424

@@ -40,7 +40,7 @@ void LanceExtension::Load(ExtensionLoader &loader) {
4040
RegisterLanceScanOptimizer(config);
4141
RegisterLanceStorage(config);
4242
RegisterLanceReplacement(config);
43-
RegisterLanceTruncate(config);
43+
RegisterLanceTruncate(config, loader);
4444
RegisterLanceIndex(config, loader);
4545
}
4646

src/lance_index.cpp

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -574,10 +574,8 @@ LanceIndexListBind(ClientContext &context, TableFunctionBindInput &input,
574574
LanceFormatErrorSuffix());
575575
}
576576
lance_free_schema(schema_handle);
577-
578-
auto &config = DBConfig::GetConfig(context);
579577
ArrowTableFunction::PopulateArrowTableSchema(
580-
config, result->arrow_table, result->schema_root.arrow_schema);
578+
context, result->arrow_table, result->schema_root.arrow_schema);
581579
result->names = result->arrow_table.GetNames();
582580
result->types = result->arrow_table.GetTypes();
583581
names = result->names;
@@ -645,10 +643,8 @@ LanceIndexListTableBind(ClientContext &context, TableFunctionBindInput &input,
645643
LanceFormatErrorSuffix());
646644
}
647645
lance_free_schema(schema_handle);
648-
649-
auto &config = DBConfig::GetConfig(context);
650646
ArrowTableFunction::PopulateArrowTableSchema(
651-
config, result->arrow_table, result->schema_root.arrow_schema);
647+
context, result->arrow_table, result->schema_root.arrow_schema);
652648
result->names = result->arrow_table.GetNames();
653649
result->types = result->arrow_table.GetTypes();
654650
names = result->names;
@@ -749,13 +745,13 @@ static void LanceIndexListFunc(ClientContext &context, TableFunctionInput &data,
749745
auto remaining = NumericCast<idx_t>(local_state.chunk->arrow_array.length) -
750746
local_state.chunk_offset;
751747
auto output_size = MinValue<idx_t>(STANDARD_VECTOR_SIZE, remaining);
752-
auto start = global_state.lines_read.fetch_add(output_size);
748+
global_state.lines_read.fetch_add(output_size);
753749

754750
output.SetCardinality(output_size);
755751
// The Lance index list stream always returns all columns (no projection
756752
// pushdown), so we must map DuckDB projection column ids to Arrow children.
757753
ArrowTableFunction::ArrowToDuckDB(
758-
local_state, bind_data.arrow_table.GetColumns(), output, start,
754+
local_state, bind_data.arrow_table.GetColumns(), output,
759755
/*arrow_scan_is_projected=*/false);
760756
local_state.chunk_offset += output_size;
761757

@@ -1178,8 +1174,8 @@ class PhysicalLanceCreateIndex final : public PhysicalOperator {
11781174
params_json(std::move(params_json_p)), replace(replace_p),
11791175
train(train_p) {}
11801176

1181-
SourceResultType GetData(ExecutionContext &context, DataChunk &chunk,
1182-
OperatorSourceInput &input) const override {
1177+
SourceResultType GetDataInternal(ExecutionContext &context, DataChunk &chunk,
1178+
OperatorSourceInput &input) const override {
11831179
(void)input;
11841180
auto &client_context = context.client;
11851181

@@ -1653,7 +1649,7 @@ void RegisterLanceIndex(DBConfig &config, ExtensionLoader &loader) {
16531649
extension.parse_function = LanceIndexParse;
16541650
extension.plan_function = LanceIndexPlan;
16551651
extension.parser_info = make_shared_ptr<ParserExtensionInfo>();
1656-
config.parser_extensions.push_back(std::move(extension));
1652+
ParserExtension::Register(config, std::move(extension));
16571653

16581654
// Register DuckDB index types that should route to Lance index DDL when used
16591655
// on tables in ATTACH TYPE LANCE namespaces.

src/lance_insert.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -179,8 +179,8 @@ class PhysicalLanceInsert final : public PhysicalOperator {
179179
return make_uniq<LanceInsertSourceState>();
180180
}
181181

182-
SourceResultType GetData(ExecutionContext &, DataChunk &chunk,
183-
OperatorSourceInput &input) const override {
182+
SourceResultType GetDataInternal(ExecutionContext &, DataChunk &chunk,
183+
OperatorSourceInput &input) const override {
184184
auto &state = input.global_state.Cast<LanceInsertSourceState>();
185185
if (state.emitted) {
186186
return SourceResultType::FINISHED;

src/lance_scan.cpp

Lines changed: 35 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -781,10 +781,8 @@ static unique_ptr<FunctionData> LanceScanBind(ClientContext &context,
781781
LanceFormatErrorSuffix());
782782
}
783783
lance_free_schema(schema_handle);
784-
785-
auto &config = DBConfig::GetConfig(context);
786784
ArrowTableFunction::PopulateArrowTableSchema(
787-
config, result->arrow_table, result->schema_root.arrow_schema);
785+
context, result->arrow_table, result->schema_root.arrow_schema);
788786
result->names = result->arrow_table.GetNames();
789787
result->types = result->arrow_table.GetTypes();
790788

@@ -804,7 +802,7 @@ static unique_ptr<FunctionData> LanceScanBind(ClientContext &context,
804802
}
805803
lance_free_schema(scan_schema_handle);
806804
ArrowTableFunction::PopulateArrowTableSchema(
807-
config, result->scan_arrow_table, result->scan_schema_root.arrow_schema);
805+
context, result->scan_arrow_table, result->scan_schema_root.arrow_schema);
808806
names = result->names;
809807
return_types = result->types;
810808
return std::move(result);
@@ -875,10 +873,8 @@ LanceNamespaceScanBind(ClientContext &context, TableFunctionBindInput &input,
875873
LanceFormatErrorSuffix());
876874
}
877875
lance_free_schema(schema_handle);
878-
879-
auto &config = DBConfig::GetConfig(context);
880876
ArrowTableFunction::PopulateArrowTableSchema(
881-
config, result->arrow_table, result->schema_root.arrow_schema);
877+
context, result->arrow_table, result->schema_root.arrow_schema);
882878
result->names = result->arrow_table.GetNames();
883879
result->types = result->arrow_table.GetTypes();
884880

@@ -899,7 +895,7 @@ LanceNamespaceScanBind(ClientContext &context, TableFunctionBindInput &input,
899895
}
900896
lance_free_schema(scan_schema_handle);
901897
ArrowTableFunction::PopulateArrowTableSchema(
902-
config, result->scan_arrow_table, result->scan_schema_root.arrow_schema);
898+
context, result->scan_arrow_table, result->scan_schema_root.arrow_schema);
903899
names = result->names;
904900
return_types = result->types;
905901
return std::move(result);
@@ -1387,12 +1383,12 @@ static void LanceScanFunc(ClientContext &context, TableFunctionInput &data,
13871383
auto remaining = NumericCast<idx_t>(local_state.chunk->arrow_array.length) -
13881384
local_state.chunk_offset;
13891385
auto output_size = MinValue<idx_t>(STANDARD_VECTOR_SIZE, remaining);
1390-
auto start = global_state.lines_read.fetch_add(output_size);
1386+
global_state.lines_read.fetch_add(output_size);
13911387

13921388
local_state.scan_converted.Reset();
13931389
local_state.scan_converted.SetCardinality(output_size);
13941390
ArrowTableFunction::ArrowToDuckDB(local_state, arrow_columns,
1395-
local_state.scan_converted, start);
1391+
local_state.scan_converted);
13961392

13971393
auto fill_output_from_converted = [&](DataChunk &target) {
13981394
if (target.ColumnCount() !=
@@ -1579,6 +1575,12 @@ LanceScanDynamicToString(TableFunctionDynamicToStringInput &input) {
15791575
return result;
15801576
}
15811577

1578+
static idx_t LanceScanRowsScanned(GlobalTableFunctionState &global_state,
1579+
LocalTableFunctionState &) {
1580+
auto &scan_state = global_state.Cast<LanceScanGlobalState>();
1581+
return scan_state.lines_read.load();
1582+
}
1583+
15821584
static bool TryParseConstantLimitOffset(const LogicalLimit &limit_op,
15831585
optional_idx &out_limit,
15841586
idx_t &out_offset) {
@@ -2137,10 +2139,8 @@ LanceExecPushdown(ClientContext &context, Optimizer &optimizer,
21372139
LanceFormatErrorSuffix());
21382140
}
21392141
lance_free_schema(schema_handle);
2140-
2141-
auto &config = DBConfig::GetConfig(context);
21422142
ArrowTableFunction::PopulateArrowTableSchema(
2143-
config, exec_bind->arrow_table, exec_bind->schema_root.arrow_schema);
2143+
context, exec_bind->arrow_table, exec_bind->schema_root.arrow_schema);
21442144
exec_names = exec_bind->arrow_table.GetNames();
21452145
exec_types = exec_bind->arrow_table.GetTypes();
21462146
} catch (...) {
@@ -2360,23 +2360,23 @@ static void LanceCardinalityFixupOptimizer(OptimizerExtensionInput &input,
23602360
void RegisterLanceScanOptimizer(DBConfig &config) {
23612361
OptimizerExtension exec_ext;
23622362
exec_ext.optimize_function = LanceExecPushdownOptimizer;
2363-
config.optimizer_extensions.push_back(std::move(exec_ext));
2363+
OptimizerExtension::Register(config, std::move(exec_ext));
23642364

23652365
OptimizerExtension rowid_take_ext;
23662366
rowid_take_ext.optimize_function = LanceRowIdInRewriteOptimizer;
2367-
config.optimizer_extensions.push_back(std::move(rowid_take_ext));
2367+
OptimizerExtension::Register(config, std::move(rowid_take_ext));
23682368

23692369
OptimizerExtension like_ext;
23702370
like_ext.optimize_function = LanceLikePushdownOptimizer;
2371-
config.optimizer_extensions.push_back(std::move(like_ext));
2371+
OptimizerExtension::Register(config, std::move(like_ext));
23722372

23732373
OptimizerExtension limit_ext;
23742374
limit_ext.optimize_function = LanceLimitOffsetPushdownOptimizer;
2375-
config.optimizer_extensions.push_back(std::move(limit_ext));
2375+
OptimizerExtension::Register(config, std::move(limit_ext));
23762376

23772377
OptimizerExtension cardinality_ext;
23782378
cardinality_ext.optimize_function = LanceCardinalityFixupOptimizer;
2379-
config.optimizer_extensions.push_back(std::move(cardinality_ext));
2379+
OptimizerExtension::Register(config, std::move(cardinality_ext));
23802380
}
23812381

23822382
// ---- __lance_exec (internal-only) ----
@@ -2447,10 +2447,8 @@ static unique_ptr<FunctionData> LanceExecBind(ClientContext &context,
24472447
LanceFormatErrorSuffix());
24482448
}
24492449
lance_free_schema(schema_handle);
2450-
2451-
auto &config = DBConfig::GetConfig(context);
24522450
ArrowTableFunction::PopulateArrowTableSchema(
2453-
config, result->arrow_table, result->schema_root.arrow_schema);
2451+
context, result->arrow_table, result->schema_root.arrow_schema);
24542452
result->names = result->arrow_table.GetNames();
24552453
result->types = result->arrow_table.GetTypes();
24562454

@@ -2555,11 +2553,11 @@ static void LanceExecFunc(ClientContext &context, TableFunctionInput &data,
25552553
auto remaining = NumericCast<idx_t>(local_state.chunk->arrow_array.length) -
25562554
local_state.chunk_offset;
25572555
auto output_size = MinValue<idx_t>(STANDARD_VECTOR_SIZE, remaining);
2558-
auto start = global_state.lines_read.fetch_add(output_size);
2556+
global_state.lines_read.fetch_add(output_size);
25592557

25602558
output.SetCardinality(output_size);
25612559
ArrowTableFunction::ArrowToDuckDB(
2562-
local_state, bind_data.arrow_table.GetColumns(), output, start, false);
2560+
local_state, bind_data.arrow_table.GetColumns(), output, false);
25632561
local_state.chunk_offset += output_size;
25642562

25652563
if (output.size() == 0) {
@@ -2619,6 +2617,14 @@ static TableFunction LanceTableScanFunction() {
26192617
function.get_virtual_columns = LanceGetVirtualColumns;
26202618
function.to_string = LanceScanToString;
26212619
function.dynamic_to_string = LanceScanDynamicToString;
2620+
function.rows_scanned = LanceScanRowsScanned;
2621+
function.get_bind_info = [](const optional_ptr<FunctionData> bind_data) {
2622+
auto *scan_bind = dynamic_cast<const LanceScanBindData *>(bind_data.get());
2623+
if (scan_bind && scan_bind->table_entry) {
2624+
return BindInfo(const_cast<TableCatalogEntry &>(*scan_bind->table_entry));
2625+
}
2626+
return BindInfo(ScanType::EXTERNAL);
2627+
};
26222628
function.init_global = LanceScanInitGlobal;
26232629
function.init_local = LanceScanLocalInit;
26242630
return function;
@@ -2678,9 +2684,8 @@ static void PopulateLanceTableSchemaFromDataset(
26782684
}
26792685
lance_free_schema(schema_handle);
26802686

2681-
auto &config = DBConfig::GetConfig(context);
26822687
ArrowTableSchema arrow_table;
2683-
ArrowTableFunction::PopulateArrowTableSchema(config, arrow_table,
2688+
ArrowTableFunction::PopulateArrowTableSchema(context, arrow_table,
26842689
schema_root.arrow_schema);
26852690
const auto names = arrow_table.GetNames();
26862691
const auto types = arrow_table.GetTypes();
@@ -3037,6 +3042,7 @@ TableFunction
30373042
LanceTableEntry::GetScanFunction(ClientContext &context,
30383043
unique_ptr<FunctionData> &bind_data) {
30393044
auto result = make_uniq<LanceScanBindData>();
3045+
result->table_entry = this;
30403046
result->file_path = dataset_uri;
30413047

30423048
string display_uri;
@@ -3063,10 +3069,8 @@ LanceTableEntry::GetScanFunction(ClientContext &context,
30633069
LanceFormatErrorSuffix());
30643070
}
30653071
lance_free_schema(schema_handle);
3066-
3067-
auto &config = DBConfig::GetConfig(context);
30683072
ArrowTableFunction::PopulateArrowTableSchema(
3069-
config, result->arrow_table, result->schema_root.arrow_schema);
3073+
context, result->arrow_table, result->schema_root.arrow_schema);
30703074
result->names = result->arrow_table.GetNames();
30713075
result->types = result->arrow_table.GetTypes();
30723076

@@ -3086,7 +3090,7 @@ LanceTableEntry::GetScanFunction(ClientContext &context,
30863090
}
30873091
lance_free_schema(scan_schema_handle);
30883092
ArrowTableFunction::PopulateArrowTableSchema(
3089-
config, result->scan_arrow_table, result->scan_schema_root.arrow_schema);
3093+
context, result->scan_arrow_table, result->scan_schema_root.arrow_schema);
30903094

30913095
bind_data = std::move(result);
30923096
return LanceTableScanFunction();
@@ -3111,6 +3115,7 @@ void RegisterLanceScan(ExtensionLoader &loader) {
31113115
internal_scan.get_virtual_columns = LanceGetVirtualColumns;
31123116
internal_scan.to_string = LanceScanToString;
31133117
internal_scan.dynamic_to_string = LanceScanDynamicToString;
3118+
internal_scan.rows_scanned = LanceScanRowsScanned;
31143119

31153120
CreateTableFunctionInfo scan_info(std::move(internal_scan));
31163121
scan_info.internal = true;
@@ -3141,6 +3146,7 @@ void RegisterLanceScan(ExtensionLoader &loader) {
31413146
internal_namespace_scan.get_virtual_columns = LanceGetVirtualColumns;
31423147
internal_namespace_scan.to_string = LanceScanToString;
31433148
internal_namespace_scan.dynamic_to_string = LanceScanDynamicToString;
3149+
internal_namespace_scan.rows_scanned = LanceScanRowsScanned;
31443150

31453151
CreateTableFunctionInfo internal_info(std::move(internal_namespace_scan));
31463152
internal_info.internal = true;

0 commit comments

Comments
 (0)