Skip to content

Commit 603807b

Browse files
duckdblabs-bothrl20
authored andcommitted
Update vendored DuckDB sources to 44e807a
1 parent c996d56 commit 603807b

File tree

50 files changed

+815
-210
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+815
-210
lines changed

src/duckdb/src/common/arrow/arrow_type_extension.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,11 @@ ArrowTypeExtension::ArrowTypeExtension(string extension_name, string arrow_forma
1414
: extension_metadata(std::move(extension_name), {}, {}, std::move(arrow_format)), type_extension(std::move(type)) {
1515
}
1616

17+
ArrowTypeExtension::ArrowTypeExtension(ArrowExtensionMetadata &extension_metadata, unique_ptr<ArrowType> type)
18+
: extension_metadata(extension_metadata) {
19+
type_extension = make_shared_ptr<ArrowTypeExtensionData>(type->GetDuckType());
20+
}
21+
1722
ArrowExtensionMetadata::ArrowExtensionMetadata(string extension_name, string vendor_name, string type_name,
1823
string arrow_format)
1924
: extension_name(std::move(extension_name)), vendor_name(std::move(vendor_name)), type_name(std::move(type_name)),
@@ -197,10 +202,12 @@ ArrowTypeExtension GetArrowExtensionInternal(
197202
unordered_map<ArrowExtensionMetadata, ArrowTypeExtension, HashArrowTypeExtension> &type_extensions,
198203
ArrowExtensionMetadata info) {
199204
if (type_extensions.find(info) == type_extensions.end()) {
205+
auto og_info = info;
200206
info.SetArrowFormat("");
201207
if (type_extensions.find(info) == type_extensions.end()) {
202-
throw NotImplementedException("Arrow Extension with configuration:\n%s not yet registered",
203-
info.ToString());
208+
auto format = og_info.GetArrowFormat();
209+
auto type = ArrowType::GetTypeFromFormat(format);
210+
return ArrowTypeExtension(og_info, std::move(type));
204211
}
205212
}
206213
return type_extensions[info];

src/duckdb/src/common/arrow/schema_metadata.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ ArrowSchemaMetadata ArrowSchemaMetadata::NonCanonicalType(const string &type_nam
6666

6767
bool ArrowSchemaMetadata::HasExtension() const {
6868
auto arrow_extension = GetOption(ArrowSchemaMetadata::ARROW_EXTENSION_NAME);
69-
return !arrow_extension.empty() && !StringUtil::StartsWith(arrow_extension, "ogc");
69+
return !arrow_extension.empty();
7070
}
7171

7272
ArrowExtensionMetadata ArrowSchemaMetadata::GetExtensionInfo(string format) {

src/duckdb/src/common/enum_util.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4283,19 +4283,22 @@ const StringUtil::EnumStringLiteral *GetWindowBoundaryValues() {
42834283
{ static_cast<uint32_t>(WindowBoundary::EXPR_PRECEDING_ROWS), "EXPR_PRECEDING_ROWS" },
42844284
{ static_cast<uint32_t>(WindowBoundary::EXPR_FOLLOWING_ROWS), "EXPR_FOLLOWING_ROWS" },
42854285
{ static_cast<uint32_t>(WindowBoundary::EXPR_PRECEDING_RANGE), "EXPR_PRECEDING_RANGE" },
4286-
{ static_cast<uint32_t>(WindowBoundary::EXPR_FOLLOWING_RANGE), "EXPR_FOLLOWING_RANGE" }
4286+
{ static_cast<uint32_t>(WindowBoundary::EXPR_FOLLOWING_RANGE), "EXPR_FOLLOWING_RANGE" },
4287+
{ static_cast<uint32_t>(WindowBoundary::CURRENT_ROW_GROUPS), "CURRENT_ROW_GROUPS" },
4288+
{ static_cast<uint32_t>(WindowBoundary::EXPR_PRECEDING_GROUPS), "EXPR_PRECEDING_GROUPS" },
4289+
{ static_cast<uint32_t>(WindowBoundary::EXPR_FOLLOWING_GROUPS), "EXPR_FOLLOWING_GROUPS" }
42874290
};
42884291
return values;
42894292
}
42904293

42914294
template<>
42924295
const char* EnumUtil::ToChars<WindowBoundary>(WindowBoundary value) {
4293-
return StringUtil::EnumToString(GetWindowBoundaryValues(), 9, "WindowBoundary", static_cast<uint32_t>(value));
4296+
return StringUtil::EnumToString(GetWindowBoundaryValues(), 12, "WindowBoundary", static_cast<uint32_t>(value));
42944297
}
42954298

42964299
template<>
42974300
WindowBoundary EnumUtil::FromString<WindowBoundary>(const char *value) {
4298-
return static_cast<WindowBoundary>(StringUtil::StringToEnum(GetWindowBoundaryValues(), 9, "WindowBoundary", value));
4301+
return static_cast<WindowBoundary>(StringUtil::StringToEnum(GetWindowBoundaryValues(), 12, "WindowBoundary", value));
42994302
}
43004303

43014304
const StringUtil::EnumStringLiteral *GetWindowExcludeModeValues() {

src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ void CSVSniffer::DetectHeader() {
335335
auto &sniffer_state_machine = best_candidate->GetStateMachine();
336336
names = DetectHeaderInternal(buffer_manager->context, best_header_row, sniffer_state_machine, set_columns,
337337
best_sql_types_candidates_per_column_idx, options, *error_handler);
338-
if (single_row_file && sniffer_state_machine.dialect_options.header.GetValue()) {
338+
if (EmptyOrOnlyHeader()) {
339339
// This file only contains a header, lets default to the lowest type of all.
340340
detected_types.clear();
341341
for (idx_t i = 0; i < names.size(); i++) {

src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,10 @@ idx_t CSVSniffer::LinesSniffed() const {
9999
return lines_sniffed;
100100
}
101101

102+
bool CSVSniffer::EmptyOrOnlyHeader() const {
103+
return (single_row_file && best_candidate->state_machine->dialect_options.header.GetValue()) || lines_sniffed == 0;
104+
}
105+
102106
bool CSVSniffer::CanYouCastIt(ClientContext &context, const string_t value, const LogicalType &type,
103107
const DialectOptions &dialect_options, const bool is_null, const char decimal_separator) {
104108
if (is_null) {

src/duckdb/src/execution/operator/persistent/physical_delete.cpp

Lines changed: 56 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,10 @@ class DeleteLocalState : public LocalSinkState {
4747
public:
4848
DeleteLocalState(ClientContext &context, TableCatalogEntry &table,
4949
const vector<unique_ptr<BoundConstraint>> &bound_constraints) {
50-
delete_chunk.Initialize(Allocator::Get(context), table.GetTypes());
50+
const auto &types = table.GetTypes();
51+
auto initialize = vector<bool>(types.size(), false);
52+
delete_chunk.Initialize(Allocator::Get(context), types, initialize);
53+
5154
auto &storage = table.GetStorage();
5255
delete_state = storage.InitializeDelete(table, context, bound_constraints);
5356
}
@@ -64,22 +67,66 @@ SinkResultType PhysicalDelete::Sink(ExecutionContext &context, DataChunk &chunk,
6467
auto &transaction = DuckTransaction::Get(context.client, table.db);
6568
auto &row_ids = chunk.data[row_id_index];
6669

67-
vector<StorageIndex> column_ids;
68-
for (idx_t i = 0; i < table.ColumnCount(); i++) {
69-
column_ids.emplace_back(i);
70-
};
71-
auto fetch_state = ColumnFetchState();
72-
7370
lock_guard<mutex> delete_guard(g_state.delete_lock);
7471
if (!return_chunk && !g_state.has_unique_indexes) {
7572
g_state.deleted_count += table.Delete(*l_state.delete_state, context.client, row_ids, chunk.size());
7673
return SinkResultType::NEED_MORE_INPUT;
7774
}
7875

79-
// Fetch the to-be-deleted chunk.
76+
auto types = table.GetTypes();
77+
auto to_be_fetched = vector<bool>(types.size(), return_chunk);
78+
vector<StorageIndex> column_ids;
79+
vector<LogicalType> column_types;
80+
if (return_chunk) {
81+
// Fetch all columns.
82+
column_types = types;
83+
for (idx_t i = 0; i < table.ColumnCount(); i++) {
84+
column_ids.emplace_back(i);
85+
}
86+
87+
} else {
88+
// Fetch only the required columns for updating the delete indexes.
89+
auto &local_storage = LocalStorage::Get(context.client, table.db);
90+
auto storage = local_storage.GetStorage(table);
91+
unordered_set<column_t> indexed_column_id_set;
92+
storage->delete_indexes.Scan([&](Index &index) {
93+
if (!index.IsBound() || !index.IsUnique()) {
94+
return false;
95+
}
96+
auto &set = index.GetColumnIdSet();
97+
indexed_column_id_set.insert(set.begin(), set.end());
98+
return false;
99+
});
100+
for (auto &col : indexed_column_id_set) {
101+
column_ids.emplace_back(col);
102+
}
103+
sort(column_ids.begin(), column_ids.end());
104+
for (auto &col : column_ids) {
105+
auto i = col.GetPrimaryIndex();
106+
to_be_fetched[i] = true;
107+
column_types.push_back(types[i]);
108+
}
109+
}
110+
80111
l_state.delete_chunk.Reset();
81112
row_ids.Flatten(chunk.size());
82-
table.Fetch(transaction, l_state.delete_chunk, column_ids, row_ids, chunk.size(), fetch_state);
113+
114+
// Fetch the to-be-deleted chunk.
115+
DataChunk fetch_chunk;
116+
fetch_chunk.Initialize(Allocator::Get(context.client), column_types, chunk.size());
117+
auto fetch_state = ColumnFetchState();
118+
table.Fetch(transaction, fetch_chunk, column_ids, row_ids, chunk.size(), fetch_state);
119+
120+
// Reference the necessary columns of the fetch_chunk.
121+
idx_t fetch_idx = 0;
122+
for (idx_t i = 0; i < table.ColumnCount(); i++) {
123+
if (to_be_fetched[i]) {
124+
l_state.delete_chunk.data[i].Reference(fetch_chunk.data[fetch_idx++]);
125+
continue;
126+
}
127+
l_state.delete_chunk.data[i].Reference(Value(types[i]));
128+
}
129+
l_state.delete_chunk.SetCardinality(fetch_chunk);
83130

84131
// Append the deleted row IDs to the delete indexes.
85132
// If we only delete local row IDs, then the delete_chunk is empty.

src/duckdb/src/execution/operator/persistent/physical_insert.cpp

Lines changed: 46 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -80,15 +80,17 @@ InsertGlobalState::InsertGlobalState(ClientContext &context, const vector<Logica
8080
: table(table), insert_count(0), initialized(false), return_collection(context, return_types) {
8181
}
8282

83-
InsertLocalState::InsertLocalState(ClientContext &context, const vector<LogicalType> &types,
83+
InsertLocalState::InsertLocalState(ClientContext &context, const vector<LogicalType> &types_p,
8484
const vector<unique_ptr<Expression>> &bound_defaults,
8585
const vector<unique_ptr<BoundConstraint>> &bound_constraints)
8686
: default_executor(context, bound_defaults), bound_constraints(bound_constraints) {
8787

8888
auto &allocator = Allocator::Get(context);
89-
insert_chunk.Initialize(allocator, types);
90-
update_chunk.Initialize(allocator, types);
91-
append_chunk.Initialize(allocator, types);
89+
90+
types = types_p;
91+
auto initialize = vector<bool>(types.size(), false);
92+
update_chunk.Initialize(allocator, types, initialize);
93+
append_chunk.Initialize(allocator, types, initialize);
9294
}
9395

9496
ConstraintState &InsertLocalState::GetConstraintState(DataTable &table, TableCatalogEntry &table_ref) {
@@ -185,8 +187,10 @@ static void CombineExistingAndInsertTuples(DataChunk &result, DataChunk &scan_ch
185187
auto &insert_types = op.insert_types;
186188

187189
if (types_to_fetch.empty()) {
188-
// We have not scanned the initial table, so we can just duplicate the initial chunk
189-
result.Initialize(client, input_chunk.GetTypes());
190+
// We have not scanned the initial table, so we duplicate the initial chunk.
191+
const auto &types = input_chunk.GetTypes();
192+
auto initialize = vector<bool>(types.size(), false);
193+
result.Initialize(client, types, initialize, input_chunk.size());
190194
result.Reference(input_chunk);
191195
result.SetCardinality(input_chunk);
192196
return;
@@ -196,7 +200,7 @@ static void CombineExistingAndInsertTuples(DataChunk &result, DataChunk &scan_ch
196200
combined_types.insert(combined_types.end(), insert_types.begin(), insert_types.end());
197201
combined_types.insert(combined_types.end(), types_to_fetch.begin(), types_to_fetch.end());
198202

199-
result.Initialize(client, combined_types);
203+
result.Initialize(client, combined_types, input_chunk.size());
200204
result.Reset();
201205
// Add the VALUES list
202206
for (idx_t i = 0; i < insert_types.size(); i++) {
@@ -223,12 +227,13 @@ static void CombineExistingAndInsertTuples(DataChunk &result, DataChunk &scan_ch
223227
result.SetCardinality(input_chunk.size());
224228
}
225229

226-
static void CreateUpdateChunk(ExecutionContext &context, DataChunk &chunk, TableCatalogEntry &table, Vector &row_ids,
227-
DataChunk &update_chunk, const PhysicalInsert &op) {
230+
static void CreateUpdateChunk(ExecutionContext &context, DataChunk &chunk, Vector &row_ids, DataChunk &update_chunk,
231+
const PhysicalInsert &op) {
228232

229233
auto &do_update_condition = op.do_update_condition;
230234
auto &set_types = op.set_types;
231235
auto &set_expressions = op.set_expressions;
236+
232237
// Check the optional condition for the DO UPDATE clause, to filter which rows will be updated
233238
if (do_update_condition) {
234239
DataChunk do_update_filter_result;
@@ -255,8 +260,15 @@ static void CreateUpdateChunk(ExecutionContext &context, DataChunk &chunk, Table
255260
}
256261
}
257262

258-
// Execute the SET expressions
259-
update_chunk.Initialize(context.client, set_types);
263+
if (chunk.size() == 0) {
264+
auto initialize = vector<bool>(set_types.size(), false);
265+
update_chunk.Initialize(context.client, set_types, initialize, chunk.size());
266+
update_chunk.SetCardinality(chunk);
267+
return;
268+
}
269+
270+
// Execute the SET expressions.
271+
update_chunk.Initialize(context.client, set_types, chunk.size());
260272
ExpressionExecutor executor(context.client, set_expressions);
261273
executor.Execute(chunk, update_chunk);
262274
update_chunk.SetCardinality(chunk);
@@ -272,7 +284,7 @@ static idx_t PerformOnConflictAction(InsertLocalState &lstate, ExecutionContext
272284

273285
auto &set_columns = op.set_columns;
274286
DataChunk update_chunk;
275-
CreateUpdateChunk(context, chunk, table, row_ids, update_chunk, op);
287+
CreateUpdateChunk(context, chunk, row_ids, update_chunk, op);
276288
auto &data_table = table.GetStorage();
277289

278290
// Perform the UPDATE on the (global) storage.
@@ -476,7 +488,9 @@ static idx_t HandleInsertConflicts(TableCatalogEntry &table, ExecutionContext &c
476488
DataChunk combined_chunk; // contains conflict_chunk + scan_chunk (wide)
477489

478490
// Filter out everything but the conflicting rows
479-
conflict_chunk.Initialize(context.client, tuples.GetTypes());
491+
const auto &types = tuples.GetTypes();
492+
auto initialize = vector<bool>(types.size(), false);
493+
conflict_chunk.Initialize(context.client, types, initialize, tuples.size());
480494
conflict_chunk.Reference(tuples);
481495
conflict_chunk.Slice(conflicts.Selection(), conflicts.Count());
482496
conflict_chunk.SetCardinality(conflicts.Count());
@@ -487,7 +501,7 @@ static idx_t HandleInsertConflicts(TableCatalogEntry &table, ExecutionContext &c
487501
D_ASSERT(scan_chunk.size() == 0);
488502
// When these values are required for the conditions or the SET expressions,
489503
// then we scan the existing table for the conflicting tuples, using the rowids
490-
scan_chunk.Initialize(context.client, types_to_fetch);
504+
scan_chunk.Initialize(context.client, types_to_fetch, conflicts.Count());
491505
fetch_state = make_uniq<ColumnFetchState>();
492506
if (GLOBAL) {
493507
auto &transaction = DuckTransaction::Get(context.client, table.catalog);
@@ -520,7 +534,7 @@ static idx_t HandleInsertConflicts(TableCatalogEntry &table, ExecutionContext &c
520534
return affected_tuples;
521535
}
522536

523-
idx_t PhysicalInsert::OnConflictHandling(TableCatalogEntry &table, ExecutionContext &context, InsertGlobalState &gstate,
537+
idx_t PhysicalInsert::OnConflictHandling(TableCatalogEntry &table, ExecutionContext &context,
524538
InsertLocalState &lstate) const {
525539
auto &data_table = table.GetStorage();
526540
auto &local_storage = LocalStorage::Get(context.client, data_table.db);
@@ -620,6 +634,21 @@ SinkResultType PhysicalInsert::Sink(ExecutionContext &context, DataChunk &chunk,
620634

621635
auto &table = gstate.table;
622636
auto &storage = table.GetStorage();
637+
if (lstate.init_insert_chunk) {
638+
auto initialize = vector<bool>(lstate.types.size(), false);
639+
if (!column_index_map.empty()) {
640+
for (auto &col : table.GetColumns().Physical()) {
641+
auto storage_idx = col.StorageOid();
642+
auto mapped_index = column_index_map[col.Physical()];
643+
if (mapped_index == DConstants::INVALID_INDEX) {
644+
initialize[storage_idx] = true;
645+
}
646+
}
647+
}
648+
auto &allocator = Allocator::Get(context.client);
649+
lstate.insert_chunk.Initialize(allocator, lstate.types, initialize, chunk.size());
650+
lstate.init_insert_chunk = false;
651+
}
623652
PhysicalInsert::ResolveDefaults(table, chunk, column_index_map, lstate.default_executor, lstate.insert_chunk);
624653

625654
if (!parallel) {
@@ -634,7 +663,7 @@ SinkResultType PhysicalInsert::Sink(ExecutionContext &context, DataChunk &chunk,
634663
// so it should not be added to the RETURNING chunk
635664
gstate.return_collection.Append(lstate.insert_chunk);
636665
}
637-
idx_t updated_tuples = OnConflictHandling(table, context, gstate, lstate);
666+
idx_t updated_tuples = OnConflictHandling(table, context, lstate);
638667
if (action_type == OnConflictAction::NOTHING && return_chunk) {
639668
// Because we didn't add to the RETURNING chunk yet
640669
// we add the tuples that did not get filtered out now
@@ -665,7 +694,7 @@ SinkResultType PhysicalInsert::Sink(ExecutionContext &context, DataChunk &chunk,
665694
lstate.local_collection->InitializeAppend(lstate.local_append_state);
666695
lstate.writer = &gstate.table.GetStorage().CreateOptimisticWriter(context.client);
667696
}
668-
OnConflictHandling(table, context, gstate, lstate);
697+
OnConflictHandling(table, context, lstate);
669698
D_ASSERT(action_type != OnConflictAction::UPDATE);
670699

671700
auto new_row_group = lstate.local_collection->Append(lstate.insert_chunk, lstate.local_append_state);

src/duckdb/src/function/compression_config.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,10 @@ static optional_ptr<CompressionFunction> LoadCompressionFunction(CompressionFunc
6565

6666
static void TryLoadCompression(DBConfig &config, vector<reference<CompressionFunction>> &result, CompressionType type,
6767
const PhysicalType physical_type) {
68+
if (config.options.disabled_compression_methods.find(type) != config.options.disabled_compression_methods.end()) {
69+
// explicitly disabled
70+
return;
71+
}
6872
auto function = config.GetCompressionFunction(type, physical_type);
6973
if (!function) {
7074
return;

src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ void ArrowType::ThrowIfInvalid() const {
5656
}
5757
}
5858

59-
unique_ptr<ArrowType> ArrowType::GetTypeFromFormat(DBConfig &config, ArrowSchema &schema, string &format) {
59+
unique_ptr<ArrowType> ArrowType::GetTypeFromFormat(string &format) {
6060
if (format == "n") {
6161
return make_uniq<ArrowType>(LogicalType::SQLNULL);
6262
} else if (format == "b") {
@@ -179,6 +179,14 @@ unique_ptr<ArrowType> ArrowType::GetTypeFromFormat(DBConfig &config, ArrowSchema
179179
}
180180
return make_uniq<ArrowType>(LogicalType::TIMESTAMP_TZ, std::move(type_info));
181181
}
182+
return nullptr;
183+
}
184+
185+
unique_ptr<ArrowType> ArrowType::GetTypeFromFormat(DBConfig &config, ArrowSchema &schema, string &format) {
186+
auto type = GetTypeFromFormat(format);
187+
if (type) {
188+
return type;
189+
}
182190
if (format == "+l") {
183191
return CreateListType(config, *schema.children[0], ArrowVariableSizeType::NORMAL, false);
184192
} else if (format == "+L") {

0 commit comments

Comments
 (0)