Skip to content

Commit b21a996

Browse files
Update vendored DuckDB sources to 5bd5afa
1 parent 5bd5afa commit b21a996

File tree

16 files changed

+154
-57
lines changed

16 files changed

+154
-57
lines changed

src/duckdb/extension/core_functions/aggregate/holistic/approximate_quantile.cpp

Lines changed: 49 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
#include "duckdb/common/serializer/serializer.hpp"
77
#include "duckdb/common/serializer/deserializer.hpp"
88

9-
#include <algorithm>
10-
#include <cmath>
119
#include <stdlib.h>
1210

1311
namespace duckdb {
@@ -17,6 +15,50 @@ struct ApproxQuantileState {
1715
idx_t pos;
1816
};
1917

18+
struct ApproxQuantileCoding {
19+
template <typename INPUT_TYPE, typename SAVE_TYPE>
20+
static SAVE_TYPE Encode(const INPUT_TYPE &input) {
21+
return Cast::template Operation<INPUT_TYPE, SAVE_TYPE>(input);
22+
}
23+
24+
template <typename SAVE_TYPE, typename TARGET_TYPE>
25+
static bool Decode(const SAVE_TYPE &source, TARGET_TYPE &target) {
26+
// The result is approximate, so clamp instead of overflowing.
27+
if (TryCast::Operation(source, target, false)) {
28+
return true;
29+
} else if (source < 0) {
30+
target = NumericLimits<TARGET_TYPE>::Minimum();
31+
} else {
32+
target = NumericLimits<TARGET_TYPE>::Maximum();
33+
}
34+
return false;
35+
}
36+
};
37+
38+
template <>
39+
double ApproxQuantileCoding::Encode(const dtime_tz_t &input) {
40+
return Encode<uint64_t, double>(input.sort_key());
41+
}
42+
43+
template <>
44+
bool ApproxQuantileCoding::Decode(const double &source, dtime_tz_t &target) {
45+
uint64_t sort_key;
46+
const auto decoded = Decode<double, uint64_t>(source, sort_key);
47+
if (decoded) {
48+
// We can invert the sort key because its offset was not touched.
49+
auto offset = dtime_tz_t::decode_offset(sort_key);
50+
auto micros = dtime_tz_t::decode_micros(sort_key);
51+
micros -= int64_t(dtime_tz_t::encode_offset(offset) * dtime_tz_t::OFFSET_MICROS);
52+
target = dtime_tz_t(dtime_t(micros), offset);
53+
} else if (source < 0) {
54+
target = Value::MinimumValue(LogicalTypeId::TIME_TZ).GetValue<dtime_tz_t>();
55+
} else {
56+
target = Value::MaximumValue(LogicalTypeId::TIME_TZ).GetValue<dtime_tz_t>();
57+
}
58+
59+
return decoded;
60+
}
61+
2062
struct ApproximateQuantileBindData : public FunctionData {
2163
ApproximateQuantileBindData() {
2264
}
@@ -73,7 +115,7 @@ struct ApproxQuantileOperation {
73115

74116
template <class INPUT_TYPE, class STATE, class OP>
75117
static void Operation(STATE &state, const INPUT_TYPE &input, AggregateUnaryInput &unary_input) {
76-
auto val = Cast::template Operation<INPUT_TYPE, SAVE_TYPE>(input);
118+
auto val = ApproxQuantileCoding::template Encode<INPUT_TYPE, SAVE_TYPE>(input);
77119
if (!Value::DoubleIsFinite(val)) {
78120
return;
79121
}
@@ -121,15 +163,8 @@ struct ApproxQuantileScalarOperation : public ApproxQuantileOperation {
121163
state.h->compress();
122164
auto &bind_data = finalize_data.input.bind_data->template Cast<ApproximateQuantileBindData>();
123165
D_ASSERT(bind_data.quantiles.size() == 1);
124-
// The result is approximate, so clamp instead of overflowing.
125166
const auto source = state.h->quantile(bind_data.quantiles[0]);
126-
if (TryCast::Operation(source, target, false)) {
127-
return;
128-
} else if (source < 0) {
129-
target = NumericLimits<TARGET_TYPE>::Minimum();
130-
} else {
131-
target = NumericLimits<TARGET_TYPE>::Maximum();
132-
}
167+
ApproxQuantileCoding::Decode(source, target);
133168
}
134169
};
135170

@@ -281,7 +316,9 @@ struct ApproxQuantileListOperation : public ApproxQuantileOperation {
281316
entry.length = bind_data.quantiles.size();
282317
for (size_t q = 0; q < entry.length; ++q) {
283318
const auto &quantile = bind_data.quantiles[q];
284-
rdata[ridx + q] = Cast::template Operation<SAVE_TYPE, CHILD_TYPE>(state.h->quantile(quantile));
319+
const auto &source = state.h->quantile(quantile);
320+
auto &target = rdata[ridx + q];
321+
ApproxQuantileCoding::Decode(source, target);
285322
}
286323

287324
ListVector::SetListSize(finalize_data.result, entry.offset + entry.length);

src/duckdb/src/common/gzip_file_system.cpp

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ void MiniZStreamWrapper::Initialize(CompressedFile &file, bool write) {
120120
} else {
121121
idx_t data_start = GZIP_HEADER_MINSIZE;
122122
auto read_count = file.child_handle->Read(gzip_hdr, GZIP_HEADER_MINSIZE);
123-
GZipFileSystem::VerifyGZIPHeader(gzip_hdr, NumericCast<idx_t>(read_count));
123+
GZipFileSystem::VerifyGZIPHeader(gzip_hdr, NumericCast<idx_t>(read_count), &file);
124124
// Skip over the extra field if necessary
125125
if (gzip_hdr[3] & GZIP_FLAG_EXTRA) {
126126
uint8_t gzip_xlen[2];
@@ -157,7 +157,7 @@ bool MiniZStreamWrapper::Read(StreamData &sd) {
157157
auto body_ptr = sd.in_buff_start + GZIP_FOOTER_SIZE;
158158
uint8_t gzip_hdr[GZIP_HEADER_MINSIZE];
159159
memcpy(gzip_hdr, body_ptr, GZIP_HEADER_MINSIZE);
160-
GZipFileSystem::VerifyGZIPHeader(gzip_hdr, GZIP_HEADER_MINSIZE);
160+
GZipFileSystem::VerifyGZIPHeader(gzip_hdr, GZIP_HEADER_MINSIZE, nullptr);
161161
body_ptr += GZIP_HEADER_MINSIZE;
162162
if (gzip_hdr[3] & GZIP_FLAG_EXTRA) {
163163
auto xlen = NumericCast<idx_t>((uint8_t)*body_ptr | (uint8_t) * (body_ptr + 1) << 8);
@@ -306,19 +306,22 @@ class GZipFile : public CompressedFile {
306306
GZipFileSystem gzip_fs;
307307
};
308308

309-
void GZipFileSystem::VerifyGZIPHeader(uint8_t gzip_hdr[], idx_t read_count) {
309+
void GZipFileSystem::VerifyGZIPHeader(uint8_t gzip_hdr[], idx_t read_count, optional_ptr<CompressedFile> source_file) {
310+
// include the filename in the error message if known
311+
string file_info = source_file ? ": " + source_file->path : "";
312+
310313
// check for incorrectly formatted files
311314
if (read_count != GZIP_HEADER_MINSIZE) {
312-
throw IOException("Input is not a GZIP stream");
315+
throw IOException("Input is not a GZIP stream" + file_info);
313316
}
314317
if (gzip_hdr[0] != 0x1F || gzip_hdr[1] != 0x8B) { // magic header
315-
throw IOException("Input is not a GZIP stream");
318+
throw IOException("Input is not a GZIP stream" + file_info);
316319
}
317320
if (gzip_hdr[2] != GZIP_COMPRESSION_DEFLATE) { // compression method
318-
throw IOException("Unsupported GZIP compression method");
321+
throw IOException("Unsupported GZIP compression method" + file_info);
319322
}
320323
if (gzip_hdr[3] & GZIP_FLAG_UNSUPPORTED) {
321-
throw IOException("Unsupported GZIP archive");
324+
throw IOException("Unsupported GZIP archive" + file_info);
322325
}
323326
}
324327

@@ -360,7 +363,7 @@ string GZipFileSystem::UncompressGZIPString(const char *data, idx_t size) {
360363
}
361364
memcpy(gzip_hdr, body_ptr, GZIP_HEADER_MINSIZE);
362365
body_ptr += GZIP_HEADER_MINSIZE;
363-
GZipFileSystem::VerifyGZIPHeader(gzip_hdr, GZIP_HEADER_MINSIZE);
366+
GZipFileSystem::VerifyGZIPHeader(gzip_hdr, GZIP_HEADER_MINSIZE, nullptr);
364367

365368
if (gzip_hdr[3] & GZIP_FLAG_EXTRA) {
366369
throw IOException("Extra field in a GZIP stream unsupported");

src/duckdb/src/execution/column_binding_resolver.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#include "duckdb/execution/column_binding_resolver.hpp"
22

33
#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
4-
#include "duckdb/common/to_string.hpp"
54
#include "duckdb/planner/expression/bound_columnref_expression.hpp"
65
#include "duckdb/planner/expression/bound_reference_expression.hpp"
76
#include "duckdb/planner/operator/logical_any_join.hpp"
@@ -27,6 +26,13 @@ void ColumnBindingResolver::VisitOperator(LogicalOperator &op) {
2726
for (auto &cond : comp_join.conditions) {
2827
VisitExpression(&cond.left);
2928
}
29+
// resolve any single-side predicates
30+
// for now, only ASOF supports this, and we are guaranteed that all right side predicates
31+
// have been pushed into a filter.
32+
if (comp_join.predicate) {
33+
D_ASSERT(op.type == LogicalOperatorType::LOGICAL_ASOF_JOIN);
34+
VisitExpression(&comp_join.predicate);
35+
}
3036
// visit the duplicate eliminated columns on the LHS, if any
3137
for (auto &expr : comp_join.duplicate_eliminated_columns) {
3238
VisitExpression(&expr);

src/duckdb/src/execution/operator/join/physical_asof_join.cpp

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ namespace duckdb {
1515
PhysicalAsOfJoin::PhysicalAsOfJoin(LogicalComparisonJoin &op, PhysicalOperator &left, PhysicalOperator &right)
1616
: PhysicalComparisonJoin(op, PhysicalOperatorType::ASOF_JOIN, std::move(op.conditions), op.join_type,
1717
op.estimated_cardinality),
18-
comparison_type(ExpressionType::INVALID) {
18+
comparison_type(ExpressionType::INVALID), predicate(std::move(op.predicate)) {
1919

2020
// Convert the conditions partitions and sorts
2121
for (auto &cond : conditions) {
@@ -380,14 +380,18 @@ class AsOfProbeBuffer {
380380
DataChunk rhs_payload;
381381
idx_t right_group = 0;
382382

383+
// Predicate evaluation
384+
SelectionVector filter_sel;
385+
ExpressionExecutor filterer;
386+
383387
idx_t lhs_match_count;
384388
bool fetch_next_left;
385389
};
386390

387391
AsOfProbeBuffer::AsOfProbeBuffer(ClientContext &context, const PhysicalAsOfJoin &op)
388392
: context(context), allocator(Allocator::Get(context)), op(op),
389393
buffer_manager(BufferManager::GetBufferManager(context)), force_external(IsExternal(context)),
390-
memory_per_thread(op.GetMaxThreadMemory(context)), left_outer(IsLeftOuterJoin(op.join_type)),
394+
memory_per_thread(op.GetMaxThreadMemory(context)), left_outer(IsLeftOuterJoin(op.join_type)), filterer(context),
391395
fetch_next_left(true) {
392396
vector<unique_ptr<BaseStatistics>> partition_stats;
393397
Orders partitions; // Not used.
@@ -400,6 +404,11 @@ AsOfProbeBuffer::AsOfProbeBuffer(ClientContext &context, const PhysicalAsOfJoin
400404

401405
lhs_sel.Initialize();
402406
left_outer.Initialize(STANDARD_VECTOR_SIZE);
407+
408+
if (op.predicate) {
409+
filter_sel.Initialize();
410+
filterer.AddExpression(*op.predicate);
411+
}
403412
}
404413

405414
void AsOfProbeBuffer::BeginLeftScan(hash_t scan_bin) {
@@ -496,7 +505,6 @@ void AsOfProbeBuffer::EndLeftScan() {
496505
void AsOfProbeBuffer::ResolveJoin(bool *found_match, idx_t *matches) {
497506
// If there was no right partition, there are no matches
498507
lhs_match_count = 0;
499-
left_outer.Reset();
500508
if (!right_itr) {
501509
return;
502510
}
@@ -549,8 +557,6 @@ void AsOfProbeBuffer::ResolveJoin(bool *found_match, idx_t *matches) {
549557
}
550558

551559
// Emit match data
552-
right_outer->SetMatch(first);
553-
left_outer.SetMatch(i);
554560
if (found_match) {
555561
found_match[i] = true;
556562
}
@@ -612,6 +618,21 @@ void AsOfProbeBuffer::ResolveComplexJoin(ExecutionContext &context, DataChunk &c
612618
chunk.data[i].Slice(lhs_payload.data[i], lhs_sel, lhs_match_count);
613619
}
614620
chunk.SetCardinality(lhs_match_count);
621+
auto match_sel = &lhs_sel;
622+
if (filterer.expressions.size() == 1) {
623+
lhs_match_count = filterer.SelectExpression(chunk, filter_sel);
624+
chunk.Slice(filter_sel, lhs_match_count);
625+
match_sel = &filter_sel;
626+
}
627+
628+
// Update the match masks for the rows we ended up with
629+
left_outer.Reset();
630+
for (idx_t i = 0; i < lhs_match_count; ++i) {
631+
const auto idx = match_sel->get_index(i);
632+
left_outer.SetMatch(idx);
633+
const auto first = matches[idx];
634+
right_outer->SetMatch(first);
635+
}
615636

616637
// If we are doing a left join, come back for the NULLs
617638
fetch_next_left = !left_outer.Enabled();

src/duckdb/src/execution/physical_plan/plan_asof_join.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,11 @@ PhysicalPlanGenerator::PlanAsOfLoopJoin(LogicalComparisonJoin &op, PhysicalOpera
4242
const auto &probe_types = op.children[0]->types;
4343
join_op.types.insert(join_op.types.end(), probe_types.begin(), probe_types.end());
4444

45+
// TODO: We can't handle predicates right now because we would have to remap column references.
46+
if (op.predicate) {
47+
return nullptr;
48+
}
49+
4550
// Fill in the projection maps to simplify the code below
4651
// Since NLJ doesn't support projection, but ASOF does,
4752
// we have to track this carefully...

src/duckdb/src/function/table/version/pragma_version.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#ifndef DUCKDB_PATCH_VERSION
2-
#define DUCKDB_PATCH_VERSION "0-dev2620"
2+
#define DUCKDB_PATCH_VERSION "0-dev2638"
33
#endif
44
#ifndef DUCKDB_MINOR_VERSION
55
#define DUCKDB_MINOR_VERSION 3
@@ -8,10 +8,10 @@
88
#define DUCKDB_MAJOR_VERSION 1
99
#endif
1010
#ifndef DUCKDB_VERSION
11-
#define DUCKDB_VERSION "v1.3.0-dev2620"
11+
#define DUCKDB_VERSION "v1.3.0-dev2638"
1212
#endif
1313
#ifndef DUCKDB_SOURCE_ID
14-
#define DUCKDB_SOURCE_ID "92a5b7b32f"
14+
#define DUCKDB_SOURCE_ID "8630414388"
1515
#endif
1616
#include "duckdb/function/table/system_functions.hpp"
1717
#include "duckdb/main/database.hpp"

src/duckdb/src/include/duckdb/common/gzip_file_system.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ class GZipFileSystem : public CompressedFileSystem {
2424
}
2525

2626
//! Verifies that a buffer contains a valid GZIP header
27-
static void VerifyGZIPHeader(uint8_t gzip_hdr[], idx_t read_count);
27+
static void VerifyGZIPHeader(uint8_t gzip_hdr[], idx_t read_count, optional_ptr<CompressedFile> source_file);
2828
static bool CheckIsZip(const char *length, idx_t size);
2929

3030
//! Consumes a byte stream as a gzip string, returning the decompressed string

src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ class PhysicalAsOfJoin : public PhysicalComparisonJoin {
3636
// Projection mappings
3737
vector<column_t> right_projection_map;
3838

39+
// Predicate (join conditions that don't reference both sides)
40+
unique_ptr<Expression> predicate;
41+
3942
public:
4043
// Operator Interface
4144
unique_ptr<GlobalOperatorState> GetGlobalOperatorState(ClientContext &context) const override;

src/duckdb/src/include/duckdb/main/extension_entries.hpp

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,6 +1020,12 @@ static constexpr ExtensionEntry EXTENSION_SETTINGS[] = {
10201020
{"unsafe_enable_version_guessing", "iceberg"},
10211021
}; // END_OF_EXTENSION_SETTINGS
10221022

1023+
static constexpr ExtensionEntry EXTENSION_SECRET_TYPES[] = {
1024+
{"aws", "httpfs"}, {"azure", "azure"}, {"gcs", "httpfs"},
1025+
{"huggingface", "httpfs"}, {"mysql", "mysql_scanner"}, {"postgres", "postgres_scanner"},
1026+
{"r2", "httpfs"}, {"s3", "httpfs"},
1027+
}; // END_OF_EXTENSION_SECRET_TYPES
1028+
10231029
// Note: these are currently hardcoded in scripts/generate_extensions_function.py
10241030
// TODO: automate by passing though to script via duckdb
10251031
static constexpr ExtensionEntry EXTENSION_COPY_FUNCTIONS[] = {{"parquet", "parquet"},
@@ -1077,14 +1083,6 @@ static constexpr ExtensionEntry EXTENSION_FILE_CONTAINS[] = {{".parquet?", "parq
10771083
{".ndjson?", ".jsonl?"},
10781084
{".jsonl?", ".ndjson?"}}; // EXTENSION_FILE_CONTAINS
10791085

1080-
// Note: these are currently hardcoded in scripts/generate_extensions_function.py
1081-
// TODO: automate by passing though to script via duckdb
1082-
static constexpr ExtensionEntry EXTENSION_SECRET_TYPES[] = {
1083-
{"s3", "httpfs"}, {"r2", "httpfs"},
1084-
{"gcs", "httpfs"}, {"azure", "azure"},
1085-
{"huggingface", "httpfs"}, {"bearer", "httpfs"},
1086-
{"mysql", "mysql_scanner"}, {"postgres", "postgres_scanner"}}; // EXTENSION_SECRET_TYPES
1087-
10881086
// Note: these are currently hardcoded in scripts/generate_extensions_function.py
10891087
// TODO: automate by passing though to script via duckdb
10901088
static constexpr ExtensionEntry EXTENSION_SECRET_PROVIDERS[] = {

src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ class LogicalComparisonJoin : public LogicalJoin {
3838
bool convert_mark_to_semi = true;
3939
//! Scans where we should push generated filters into (if any)
4040
unique_ptr<JoinFilterPushdownInfo> filter_pushdown;
41+
//! Filtering predicate from the ON clause with expressions that don't reference both sides
42+
unique_ptr<Expression> predicate;
4143

4244
public:
4345
InsertionOrderPreservingMap<string> ParamsToString() const override;

0 commit comments

Comments
 (0)