Skip to content

Commit c996d56

Browse files
duckdblabs-bothrl20
authored andcommitted
Update vendored DuckDB sources to 9d02a50
1 parent c2a7181 commit c996d56

File tree

20 files changed

+191
-78
lines changed

20 files changed

+191
-78
lines changed

src/duckdb/extension/parquet/parquet_reader.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -778,12 +778,13 @@ void ParquetReader::PrepareRowGroupBuffer(ParquetReaderScanState &state, idx_t c
778778
FilterPropagateResult prune_result;
779779
// TODO we might not have stats but STILL a bloom filter so move this up
780780
// check the bloom filter if present
781-
if (!column_reader.Type().IsNested() &&
781+
bool is_generated_column = column_reader.FileIdx() >= group.columns.size();
782+
if (!column_reader.Type().IsNested() && !is_generated_column &&
782783
ParquetStatisticsUtils::BloomFilterSupported(column_reader.Type().id()) &&
783784
ParquetStatisticsUtils::BloomFilterExcludes(filter, group.columns[column_reader.FileIdx()].meta_data,
784785
*state.thrift_file_proto, allocator)) {
785786
prune_result = FilterPropagateResult::FILTER_ALWAYS_FALSE;
786-
} else if (column_reader.Type().id() == LogicalTypeId::VARCHAR &&
787+
} else if (column_reader.Type().id() == LogicalTypeId::VARCHAR && !is_generated_column &&
787788
group.columns[column_reader.FileIdx()].meta_data.statistics.__isset.min_value &&
788789
group.columns[column_reader.FileIdx()].meta_data.statistics.__isset.max_value) {
789790

src/duckdb/src/common/arrow/arrow_converter.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ void SetArrowMapFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child,
7373
InitializeChild(root_holder.nested_children.back()[0], root_holder);
7474
child.children = &root_holder.nested_children_ptr.back()[0];
7575
child.children[0]->name = "entries";
76+
child.children[0]->flags = 0; // Set the 'entries' field to non-nullable
7677
SetArrowFormat(root_holder, **child.children, ListType::GetChildType(type), options, context);
7778
}
7879

src/duckdb/src/common/types/column/column_data_allocator.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,7 @@ ColumnDataAllocator::~ColumnDataAllocator() {
5454
for (auto &block : blocks) {
5555
block.handle->SetDestroyBufferUpon(DestroyBufferUpon::UNPIN);
5656
}
57-
const auto data_size = SizeInBytes();
5857
blocks.clear();
59-
if (Allocator::SupportsFlush() &&
60-
data_size > alloc.buffer_manager->GetBufferPool().GetAllocatorBulkDeallocationFlushThreshold()) {
61-
Allocator::FlushAll();
62-
}
6358
}
6459

6560
BufferHandle ColumnDataAllocator::Pin(uint32_t block_id) {

src/duckdb/src/common/types/row/tuple_data_segment.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -119,10 +119,6 @@ TupleDataSegment::~TupleDataSegment() {
119119
}
120120
pinned_row_handles.clear();
121121
pinned_heap_handles.clear();
122-
if (Allocator::SupportsFlush() && allocator &&
123-
data_size > allocator->GetBufferManager().GetBufferPool().GetAllocatorBulkDeallocationFlushThreshold()) {
124-
Allocator::FlushAll();
125-
}
126122
allocator.reset();
127123
}
128124

src/duckdb/src/execution/operator/join/physical_hash_join.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -638,7 +638,6 @@ void JoinFilterPushdownInfo::PushInFilter(const JoinFilterPushdownFilter &info,
638638

639639
// generate the OR filter
640640
auto in_filter = make_uniq<InFilter>(std::move(in_list));
641-
in_filter->origin_is_hash_join = true;
642641

643642
// we push the OR filter as an OptionalFilter so that we can use it for zonemap pruning only
644643
// the IN-list is expensive to execute otherwise

src/duckdb/src/execution/radix_partitioned_hashtable.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -281,12 +281,12 @@ idx_t RadixHTConfig::GetRadixBits() const {
281281
}
282282

283283
void RadixHTConfig::SetRadixBitsInternal(const idx_t radix_bits_p, bool external) {
284-
if (sink_radix_bits >= radix_bits_p || sink.any_combined) {
284+
if (sink_radix_bits > radix_bits_p || sink.any_combined) {
285285
return;
286286
}
287287

288288
auto guard = sink.Lock();
289-
if (sink_radix_bits >= radix_bits_p || sink.any_combined) {
289+
if (sink_radix_bits > radix_bits_p || sink.any_combined) {
290290
return;
291291
}
292292

src/duckdb/src/function/table/table_scan.cpp

Lines changed: 113 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "duckdb/planner/expression/bound_constant_expression.hpp"
2525
#include "duckdb/planner/expression/bound_comparison_expression.hpp"
2626
#include "duckdb/planner/filter/conjunction_filter.hpp"
27+
#include "duckdb/common/types/value_map.hpp"
2728

2829
namespace duckdb {
2930

@@ -361,56 +362,137 @@ unique_ptr<GlobalTableFunctionState> DuckIndexScanInitGlobal(ClientContext &cont
361362
return std::move(g_state);
362363
}
363364

364-
void ExtractInFilter(unique_ptr<TableFilter> &filter, BoundColumnRefExpression &bound_ref,
365-
unique_ptr<vector<unique_ptr<Expression>>> &filter_expressions) {
366-
// Special-handling of IN filters.
367-
// They are part of a CONJUNCTION_AND.
368-
if (filter->filter_type != TableFilterType::CONJUNCTION_AND) {
369-
return;
365+
void ExtractExpressionsFromValues(value_set_t &unique_values, BoundColumnRefExpression &bound_ref,
366+
vector<unique_ptr<Expression>> &expressions) {
367+
for (const auto &value : unique_values) {
368+
auto bound_constant = make_uniq<BoundConstantExpression>(value);
369+
auto filter_expr = make_uniq<BoundComparisonExpression>(ExpressionType::COMPARE_EQUAL, bound_ref.Copy(),
370+
std::move(bound_constant));
371+
expressions.push_back(std::move(filter_expr));
370372
}
373+
}
371374

372-
auto &and_filter = filter->Cast<ConjunctionAndFilter>();
373-
auto &children = and_filter.child_filters;
374-
if (children.empty()) {
375-
return;
375+
void ExtractIn(InFilter &filter, BoundColumnRefExpression &bound_ref, vector<unique_ptr<Expression>> &expressions) {
376+
// Eliminate any duplicates.
377+
value_set_t unique_values;
378+
for (const auto &value : filter.values) {
379+
if (unique_values.find(value) == unique_values.end()) {
380+
unique_values.insert(value);
381+
}
376382
}
377-
if (children[0]->filter_type != TableFilterType::OPTIONAL_FILTER) {
383+
ExtractExpressionsFromValues(unique_values, bound_ref, expressions);
384+
}
385+
386+
void ExtractConjunctionAnd(ConjunctionAndFilter &filter, BoundColumnRefExpression &bound_ref,
387+
vector<unique_ptr<Expression>> &expressions) {
388+
if (filter.child_filters.empty()) {
378389
return;
379390
}
380391

381-
auto &optional_filter = children[0]->Cast<OptionalFilter>();
382-
auto &child = optional_filter.child_filter;
383-
if (child->filter_type != TableFilterType::IN_FILTER) {
392+
// Extract the CONSTANT_COMPARISON and IN_FILTER children.
393+
vector<reference<ConstantFilter>> comparisons;
394+
vector<reference<InFilter>> in_filters;
395+
396+
for (idx_t i = 0; i < filter.child_filters.size(); i++) {
397+
if (filter.child_filters[i]->filter_type == TableFilterType::CONSTANT_COMPARISON) {
398+
auto &comparison = filter.child_filters[i]->Cast<ConstantFilter>();
399+
comparisons.push_back(comparison);
400+
continue;
401+
}
402+
403+
if (filter.child_filters[i]->filter_type == TableFilterType::OPTIONAL_FILTER) {
404+
auto &optional_filter = filter.child_filters[i]->Cast<OptionalFilter>();
405+
if (!optional_filter.child_filter) {
406+
return;
407+
}
408+
if (optional_filter.child_filter->filter_type != TableFilterType::IN_FILTER) {
409+
// No support for other optional filter types yet.
410+
return;
411+
}
412+
auto &in_filter = optional_filter.child_filter->Cast<InFilter>();
413+
in_filters.push_back(in_filter);
414+
continue;
415+
}
416+
417+
// No support for other filter types than CONSTANT_COMPARISON and IN_FILTER in CONJUNCTION_AND yet.
384418
return;
385419
}
386420

387-
auto &in_filter = child->Cast<InFilter>();
388-
if (!in_filter.origin_is_hash_join) {
421+
// No support for other CONJUNCTION_AND cases yet.
422+
if (in_filters.empty()) {
389423
return;
390424
}
391425

392-
// They are all on the same column, so we can split them.
393-
for (const auto &value : in_filter.values) {
394-
auto bound_constant = make_uniq<BoundConstantExpression>(value);
395-
auto filter_expr = make_uniq<BoundComparisonExpression>(ExpressionType::COMPARE_EQUAL, bound_ref.Copy(),
396-
std::move(bound_constant));
397-
filter_expressions->push_back(std::move(filter_expr));
426+
// Get the combined unique values of the IN filters.
427+
value_set_t unique_values;
428+
for (idx_t filter_idx = 0; filter_idx < in_filters.size(); filter_idx++) {
429+
auto &in_filter = in_filters[filter_idx].get();
430+
for (idx_t value_idx = 0; value_idx < in_filter.values.size(); value_idx++) {
431+
auto &value = in_filter.values[value_idx];
432+
if (unique_values.find(value) != unique_values.end()) {
433+
continue;
434+
}
435+
unique_values.insert(value);
436+
}
437+
}
438+
439+
// Extract all qualifying values.
440+
for (auto value_it = unique_values.begin(); value_it != unique_values.end();) {
441+
bool qualifies = true;
442+
for (idx_t comp_idx = 0; comp_idx < comparisons.size(); comp_idx++) {
443+
if (!comparisons[comp_idx].get().Compare(*value_it)) {
444+
qualifies = false;
445+
value_it = unique_values.erase(value_it);
446+
break;
447+
}
448+
}
449+
if (qualifies) {
450+
value_it++;
451+
}
452+
}
453+
454+
ExtractExpressionsFromValues(unique_values, bound_ref, expressions);
455+
}
456+
457+
void ExtractFilter(TableFilter &filter, BoundColumnRefExpression &bound_ref,
458+
vector<unique_ptr<Expression>> &expressions) {
459+
switch (filter.filter_type) {
460+
case TableFilterType::OPTIONAL_FILTER: {
461+
auto &optional_filter = filter.Cast<OptionalFilter>();
462+
if (!optional_filter.child_filter) {
463+
return;
464+
}
465+
return ExtractFilter(*optional_filter.child_filter, bound_ref, expressions);
466+
}
467+
case TableFilterType::IN_FILTER: {
468+
auto &in_filter = filter.Cast<InFilter>();
469+
ExtractIn(in_filter, bound_ref, expressions);
470+
return;
471+
}
472+
case TableFilterType::CONJUNCTION_AND: {
473+
auto &conjunction_and = filter.Cast<ConjunctionAndFilter>();
474+
ExtractConjunctionAnd(conjunction_and, bound_ref, expressions);
475+
return;
476+
}
477+
default:
478+
return;
398479
}
399480
}
400481

401-
unique_ptr<vector<unique_ptr<Expression>>> ExtractFilters(const ColumnDefinition &col, unique_ptr<TableFilter> &filter,
402-
idx_t storage_idx) {
482+
vector<unique_ptr<Expression>> ExtractFilterExpressions(const ColumnDefinition &col, unique_ptr<TableFilter> &filter,
483+
idx_t storage_idx) {
403484
ColumnBinding binding(0, storage_idx);
404485
auto bound_ref = make_uniq<BoundColumnRefExpression>(col.Name(), col.Type(), binding);
405486

406-
auto filter_expressions = make_uniq<vector<unique_ptr<Expression>>>();
407-
ExtractInFilter(filter, *bound_ref, filter_expressions);
487+
vector<unique_ptr<Expression>> expressions;
488+
ExtractFilter(*filter, *bound_ref, expressions);
408489

409-
if (filter_expressions->empty()) {
490+
// Attempt matching the top-level filter to the index expression.
491+
if (expressions.empty()) {
410492
auto filter_expr = filter->ToExpression(*bound_ref);
411-
filter_expressions->push_back(std::move(filter_expr));
493+
expressions.push_back(std::move(filter_expr));
412494
}
413-
return filter_expressions;
495+
return expressions;
414496
}
415497

416498
bool TryScanIndex(ART &art, const ColumnList &column_list, TableFunctionInitInput &input, TableFilterSet &filter_set,
@@ -453,8 +535,8 @@ bool TryScanIndex(ART &art, const ColumnList &column_list, TableFunctionInitInpu
453535
return false;
454536
}
455537

456-
auto filter_expressions = ExtractFilters(col, filter->second, storage_index.GetIndex());
457-
for (const auto &filter_expr : *filter_expressions) {
538+
auto expressions = ExtractFilterExpressions(col, filter->second, storage_index.GetIndex());
539+
for (const auto &filter_expr : expressions) {
458540
auto scan_state = art.TryInitializeScan(*index_expr, *filter_expr);
459541
if (!scan_state) {
460542
return false;

src/duckdb/src/function/table/version/pragma_version.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#ifndef DUCKDB_PATCH_VERSION
2-
#define DUCKDB_PATCH_VERSION "4-dev4648"
2+
#define DUCKDB_PATCH_VERSION "4-dev4679"
33
#endif
44
#ifndef DUCKDB_MINOR_VERSION
55
#define DUCKDB_MINOR_VERSION 1
@@ -8,10 +8,10 @@
88
#define DUCKDB_MAJOR_VERSION 1
99
#endif
1010
#ifndef DUCKDB_VERSION
11-
#define DUCKDB_VERSION "v1.1.4-dev4648"
11+
#define DUCKDB_VERSION "v1.1.4-dev4679"
1212
#endif
1313
#ifndef DUCKDB_SOURCE_ID
14-
#define DUCKDB_SOURCE_ID "09cef57892"
14+
#define DUCKDB_SOURCE_ID "0024e5d4be"
1515
#endif
1616
#include "duckdb/function/table/system_functions.hpp"
1717
#include "duckdb/main/database.hpp"

src/duckdb/src/include/duckdb/planner/filter/constant_filter.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ class ConstantFilter : public TableFilter {
2727
Value constant;
2828

2929
public:
30+
bool Compare(const Value &value) const;
3031
FilterPropagateResult CheckStatistics(BaseStatistics &stats) override;
3132
string ToString(const string &column_name) override;
3233
bool Equals(const TableFilter &other) const override;

src/duckdb/src/include/duckdb/planner/filter/in_filter.hpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,8 @@ class InFilter : public TableFilter {
1919

2020
public:
2121
explicit InFilter(vector<Value> values);
22-
InFilter(vector<Value> values, bool origin_is_hash_join);
2322

2423
vector<Value> values;
25-
bool origin_is_hash_join;
2624

2725
public:
2826
FilterPropagateResult CheckStatistics(BaseStatistics &stats) override;

0 commit comments

Comments
 (0)