duckdb
diff --git a/‎src/duckdb/extension/core_functions/aggregate/holistic/approximate_quantile.cpp‎
Lines changed: 49 additions & 12 deletions b/‎src/duckdb/extension/core_functions/aggregate/holistic/approximate_quantile.cpp‎
Lines changed: 49 additions & 12 deletions
diff --git a/‎src/duckdb/src/common/gzip_file_system.cpp‎
Lines changed: 11 additions & 8 deletions b/‎src/duckdb/src/common/gzip_file_system.cpp‎
Lines changed: 11 additions & 8 deletions
diff --git a/‎src/duckdb/src/execution/column_binding_resolver.cpp‎
Lines changed: 7 additions & 1 deletion b/‎src/duckdb/src/execution/column_binding_resolver.cpp‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎src/duckdb/src/execution/operator/join/physical_asof_join.cpp‎
Lines changed: 26 additions & 5 deletions b/‎src/duckdb/src/execution/operator/join/physical_asof_join.cpp‎
Lines changed: 26 additions & 5 deletions
diff --git a/‎src/duckdb/src/execution/physical_plan/plan_asof_join.cpp‎
Lines changed: 5 additions & 0 deletions b/‎src/duckdb/src/execution/physical_plan/plan_asof_join.cpp‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/duckdb/src/function/table/version/pragma_version.cpp‎
Lines changed: 3 additions & 3 deletions b/‎src/duckdb/src/function/table/version/pragma_version.cpp‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/duckdb/src/include/duckdb/common/gzip_file_system.hpp‎
Lines changed: 1 addition & 1 deletion b/‎src/duckdb/src/include/duckdb/common/gzip_file_system.hpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp‎
Lines changed: 3 additions & 0 deletions b/‎src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/duckdb/src/include/duckdb/main/extension_entries.hpp‎
Lines changed: 6 additions & 8 deletions b/‎src/duckdb/src/include/duckdb/main/extension_entries.hpp‎
Lines changed: 6 additions & 8 deletions
diff --git a/‎src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp‎
Lines changed: 2 additions & 0 deletions b/‎src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp‎
Lines changed: 2 additions & 0 deletions
@@ -6,8 +6,6 @@
 #include "duckdb/common/serializer/serializer.hpp"
 #include "duckdb/common/serializer/deserializer.hpp"
 
-#include <algorithm>
-#include <cmath>
 #include <stdlib.h>
 
 namespace duckdb {
@@ -17,6 +15,50 @@ struct ApproxQuantileState {
 	idx_t pos;
 };
 
+struct ApproxQuantileCoding {
+	template <typename INPUT_TYPE, typename SAVE_TYPE>
+	static SAVE_TYPE Encode(const INPUT_TYPE &input) {
+		return Cast::template Operation<INPUT_TYPE, SAVE_TYPE>(input);
+	}
+
+	template <typename SAVE_TYPE, typename TARGET_TYPE>
+	static bool Decode(const SAVE_TYPE &source, TARGET_TYPE &target) {
+		// The result is approximate, so clamp instead of overflowing.
+		if (TryCast::Operation(source, target, false)) {
+			return true;
+		} else if (source < 0) {
+			target = NumericLimits<TARGET_TYPE>::Minimum();
+		} else {
+			target = NumericLimits<TARGET_TYPE>::Maximum();
+		}
+		return false;
+	}
+};
+
+template <>
+double ApproxQuantileCoding::Encode(const dtime_tz_t &input) {
+	return Encode<uint64_t, double>(input.sort_key());
+}
+
+template <>
+bool ApproxQuantileCoding::Decode(const double &source, dtime_tz_t &target) {
+	uint64_t sort_key;
+	const auto decoded = Decode<double, uint64_t>(source, sort_key);
+	if (decoded) {
+		//	We can invert the sort key because its offset was not touched.
+		auto offset = dtime_tz_t::decode_offset(sort_key);
+		auto micros = dtime_tz_t::decode_micros(sort_key);
+		micros -= int64_t(dtime_tz_t::encode_offset(offset) * dtime_tz_t::OFFSET_MICROS);
+		target = dtime_tz_t(dtime_t(micros), offset);
+	} else if (source < 0) {
+		target = Value::MinimumValue(LogicalTypeId::TIME_TZ).GetValue<dtime_tz_t>();
+	} else {
+		target = Value::MaximumValue(LogicalTypeId::TIME_TZ).GetValue<dtime_tz_t>();
+	}
+
+	return decoded;
+}
+
 struct ApproximateQuantileBindData : public FunctionData {
 	ApproximateQuantileBindData() {
 	}
@@ -73,7 +115,7 @@ struct ApproxQuantileOperation {
 
 	template <class INPUT_TYPE, class STATE, class OP>
 	static void Operation(STATE &state, const INPUT_TYPE &input, AggregateUnaryInput &unary_input) {
-		auto val = Cast::template Operation<INPUT_TYPE, SAVE_TYPE>(input);
+		auto val = ApproxQuantileCoding::template Encode<INPUT_TYPE, SAVE_TYPE>(input);
 		if (!Value::DoubleIsFinite(val)) {
 			return;
 		}
@@ -121,15 +163,8 @@ struct ApproxQuantileScalarOperation : public ApproxQuantileOperation {
 		state.h->compress();
 		auto &bind_data = finalize_data.input.bind_data->template Cast<ApproximateQuantileBindData>();
 		D_ASSERT(bind_data.quantiles.size() == 1);
-		// The result is approximate, so clamp instead of overflowing.
 		const auto source = state.h->quantile(bind_data.quantiles[0]);
-		if (TryCast::Operation(source, target, false)) {
-			return;
-		} else if (source < 0) {
-			target = NumericLimits<TARGET_TYPE>::Minimum();
-		} else {
-			target = NumericLimits<TARGET_TYPE>::Maximum();
-		}
+		ApproxQuantileCoding::Decode(source, target);
 	}
 };
 
@@ -281,7 +316,9 @@ struct ApproxQuantileListOperation : public ApproxQuantileOperation {
 		entry.length = bind_data.quantiles.size();
 		for (size_t q = 0; q < entry.length; ++q) {
 			const auto &quantile = bind_data.quantiles[q];
-			rdata[ridx + q] = Cast::template Operation<SAVE_TYPE, CHILD_TYPE>(state.h->quantile(quantile));
+			const auto &source = state.h->quantile(quantile);
+			auto &target = rdata[ridx + q];
+			ApproxQuantileCoding::Decode(source, target);
 		}
 
 		ListVector::SetListSize(finalize_data.result, entry.offset + entry.length);
 
@@ -120,7 +120,7 @@ void MiniZStreamWrapper::Initialize(CompressedFile &file, bool write) {
 	} else {
 		idx_t data_start = GZIP_HEADER_MINSIZE;
 		auto read_count = file.child_handle->Read(gzip_hdr, GZIP_HEADER_MINSIZE);
-		GZipFileSystem::VerifyGZIPHeader(gzip_hdr, NumericCast<idx_t>(read_count));
+		GZipFileSystem::VerifyGZIPHeader(gzip_hdr, NumericCast<idx_t>(read_count), &file);
 		// Skip over the extra field if necessary
 		if (gzip_hdr[3] & GZIP_FLAG_EXTRA) {
 			uint8_t gzip_xlen[2];
@@ -157,7 +157,7 @@ bool MiniZStreamWrapper::Read(StreamData &sd) {
 		auto body_ptr = sd.in_buff_start + GZIP_FOOTER_SIZE;
 		uint8_t gzip_hdr[GZIP_HEADER_MINSIZE];
 		memcpy(gzip_hdr, body_ptr, GZIP_HEADER_MINSIZE);
-		GZipFileSystem::VerifyGZIPHeader(gzip_hdr, GZIP_HEADER_MINSIZE);
+		GZipFileSystem::VerifyGZIPHeader(gzip_hdr, GZIP_HEADER_MINSIZE, nullptr);
 		body_ptr += GZIP_HEADER_MINSIZE;
 		if (gzip_hdr[3] & GZIP_FLAG_EXTRA) {
 			auto xlen = NumericCast<idx_t>((uint8_t)*body_ptr | (uint8_t) * (body_ptr + 1) << 8);
@@ -306,19 +306,22 @@ class GZipFile : public CompressedFile {
 	GZipFileSystem gzip_fs;
 };
 
-void GZipFileSystem::VerifyGZIPHeader(uint8_t gzip_hdr[], idx_t read_count) {
+void GZipFileSystem::VerifyGZIPHeader(uint8_t gzip_hdr[], idx_t read_count, optional_ptr<CompressedFile> source_file) {
+	// include the filename in the error message if known
+	string file_info = source_file ? ": " + source_file->path : "";
+
 	// check for incorrectly formatted files
 	if (read_count != GZIP_HEADER_MINSIZE) {
-		throw IOException("Input is not a GZIP stream");
+		throw IOException("Input is not a GZIP stream" + file_info);
 	}
 	if (gzip_hdr[0] != 0x1F || gzip_hdr[1] != 0x8B) { // magic header
-		throw IOException("Input is not a GZIP stream");
+		throw IOException("Input is not a GZIP stream" + file_info);
 	}
 	if (gzip_hdr[2] != GZIP_COMPRESSION_DEFLATE) { // compression method
-		throw IOException("Unsupported GZIP compression method");
+		throw IOException("Unsupported GZIP compression method" + file_info);
 	}
 	if (gzip_hdr[3] & GZIP_FLAG_UNSUPPORTED) {
-		throw IOException("Unsupported GZIP archive");
+		throw IOException("Unsupported GZIP archive" + file_info);
 	}
 }
 
@@ -360,7 +363,7 @@ string GZipFileSystem::UncompressGZIPString(const char *data, idx_t size) {
 	}
 	memcpy(gzip_hdr, body_ptr, GZIP_HEADER_MINSIZE);
 	body_ptr += GZIP_HEADER_MINSIZE;
-	GZipFileSystem::VerifyGZIPHeader(gzip_hdr, GZIP_HEADER_MINSIZE);
+	GZipFileSystem::VerifyGZIPHeader(gzip_hdr, GZIP_HEADER_MINSIZE, nullptr);
 
 	if (gzip_hdr[3] & GZIP_FLAG_EXTRA) {
 		throw IOException("Extra field in a GZIP stream unsupported");
 
@@ -1,7 +1,6 @@
 #include "duckdb/execution/column_binding_resolver.hpp"
 
 #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
-#include "duckdb/common/to_string.hpp"
 #include "duckdb/planner/expression/bound_columnref_expression.hpp"
 #include "duckdb/planner/expression/bound_reference_expression.hpp"
 #include "duckdb/planner/operator/logical_any_join.hpp"
@@ -27,6 +26,13 @@ void ColumnBindingResolver::VisitOperator(LogicalOperator &op) {
 		for (auto &cond : comp_join.conditions) {
 			VisitExpression(&cond.left);
 		}
+		// resolve any single-side predicates
+		// for now, only ASOF supports this, and we are guaranteed that all right side predicates
+		// have been pushed into a filter.
+		if (comp_join.predicate) {
+			D_ASSERT(op.type == LogicalOperatorType::LOGICAL_ASOF_JOIN);
+			VisitExpression(&comp_join.predicate);
+		}
 		// visit the duplicate eliminated columns on the LHS, if any
 		for (auto &expr : comp_join.duplicate_eliminated_columns) {
 			VisitExpression(&expr);
 
@@ -15,7 +15,7 @@ namespace duckdb {
 PhysicalAsOfJoin::PhysicalAsOfJoin(LogicalComparisonJoin &op, PhysicalOperator &left, PhysicalOperator &right)
     : PhysicalComparisonJoin(op, PhysicalOperatorType::ASOF_JOIN, std::move(op.conditions), op.join_type,
                              op.estimated_cardinality),
-      comparison_type(ExpressionType::INVALID) {
+      comparison_type(ExpressionType::INVALID), predicate(std::move(op.predicate)) {
 
 	// Convert the conditions partitions and sorts
 	for (auto &cond : conditions) {
@@ -380,14 +380,18 @@ class AsOfProbeBuffer {
 	DataChunk rhs_payload;
 	idx_t right_group = 0;
 
+	//	Predicate evaluation
+	SelectionVector filter_sel;
+	ExpressionExecutor filterer;
+
 	idx_t lhs_match_count;
 	bool fetch_next_left;
 };
 
 AsOfProbeBuffer::AsOfProbeBuffer(ClientContext &context, const PhysicalAsOfJoin &op)
     : context(context), allocator(Allocator::Get(context)), op(op),
       buffer_manager(BufferManager::GetBufferManager(context)), force_external(IsExternal(context)),
-      memory_per_thread(op.GetMaxThreadMemory(context)), left_outer(IsLeftOuterJoin(op.join_type)),
+      memory_per_thread(op.GetMaxThreadMemory(context)), left_outer(IsLeftOuterJoin(op.join_type)), filterer(context),
       fetch_next_left(true) {
 	vector<unique_ptr<BaseStatistics>> partition_stats;
 	Orders partitions; // Not used.
@@ -400,6 +404,11 @@ AsOfProbeBuffer::AsOfProbeBuffer(ClientContext &context, const PhysicalAsOfJoin
 
 	lhs_sel.Initialize();
 	left_outer.Initialize(STANDARD_VECTOR_SIZE);
+
+	if (op.predicate) {
+		filter_sel.Initialize();
+		filterer.AddExpression(*op.predicate);
+	}
 }
 
 void AsOfProbeBuffer::BeginLeftScan(hash_t scan_bin) {
@@ -496,7 +505,6 @@ void AsOfProbeBuffer::EndLeftScan() {
 void AsOfProbeBuffer::ResolveJoin(bool *found_match, idx_t *matches) {
 	// If there was no right partition, there are no matches
 	lhs_match_count = 0;
-	left_outer.Reset();
 	if (!right_itr) {
 		return;
 	}
@@ -549,8 +557,6 @@ void AsOfProbeBuffer::ResolveJoin(bool *found_match, idx_t *matches) {
 		}
 
 		// Emit match data
-		right_outer->SetMatch(first);
-		left_outer.SetMatch(i);
 		if (found_match) {
 			found_match[i] = true;
 		}
@@ -612,6 +618,21 @@ void AsOfProbeBuffer::ResolveComplexJoin(ExecutionContext &context, DataChunk &c
 		chunk.data[i].Slice(lhs_payload.data[i], lhs_sel, lhs_match_count);
 	}
 	chunk.SetCardinality(lhs_match_count);
+	auto match_sel = &lhs_sel;
+	if (filterer.expressions.size() == 1) {
+		lhs_match_count = filterer.SelectExpression(chunk, filter_sel);
+		chunk.Slice(filter_sel, lhs_match_count);
+		match_sel = &filter_sel;
+	}
+
+	//	Update the match masks for the rows we ended up with
+	left_outer.Reset();
+	for (idx_t i = 0; i < lhs_match_count; ++i) {
+		const auto idx = match_sel->get_index(i);
+		left_outer.SetMatch(idx);
+		const auto first = matches[idx];
+		right_outer->SetMatch(first);
+	}
 
 	//	If we are doing a left join, come back for the NULLs
 	fetch_next_left = !left_outer.Enabled();
 
@@ -42,6 +42,11 @@ PhysicalPlanGenerator::PlanAsOfLoopJoin(LogicalComparisonJoin &op, PhysicalOpera
 	const auto &probe_types = op.children[0]->types;
 	join_op.types.insert(join_op.types.end(), probe_types.begin(), probe_types.end());
 
+	// TODO: We can't handle predicates right now because we would have to remap column references.
+	if (op.predicate) {
+		return nullptr;
+	}
+
 	//	Fill in the projection maps to simplify the code below
 	//	Since NLJ doesn't support projection, but ASOF does,
 	//	we have to track this carefully...
 
@@ -1,5 +1,5 @@
 #ifndef DUCKDB_PATCH_VERSION
-#define DUCKDB_PATCH_VERSION "0-dev2620"
+#define DUCKDB_PATCH_VERSION "0-dev2638"
 #endif
 #ifndef DUCKDB_MINOR_VERSION
 #define DUCKDB_MINOR_VERSION 3
@@ -8,10 +8,10 @@
 #define DUCKDB_MAJOR_VERSION 1
 #endif
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "v1.3.0-dev2620"
+#define DUCKDB_VERSION "v1.3.0-dev2638"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "92a5b7b32f"
+#define DUCKDB_SOURCE_ID "8630414388"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"
 
@@ -24,7 +24,7 @@ class GZipFileSystem : public CompressedFileSystem {
 	}
 
 	//! Verifies that a buffer contains a valid GZIP header
-	static void VerifyGZIPHeader(uint8_t gzip_hdr[], idx_t read_count);
+	static void VerifyGZIPHeader(uint8_t gzip_hdr[], idx_t read_count, optional_ptr<CompressedFile> source_file);
 	static bool CheckIsZip(const char *length, idx_t size);
 
 	//! Consumes a byte stream as a gzip string, returning the decompressed string
 
@@ -36,6 +36,9 @@ class PhysicalAsOfJoin : public PhysicalComparisonJoin {
 	// Projection mappings
 	vector<column_t> right_projection_map;
 
+	// Predicate (join conditions that don't reference both sides)
+	unique_ptr<Expression> predicate;
+
 public:
 	// Operator Interface
 	unique_ptr<GlobalOperatorState> GetGlobalOperatorState(ClientContext &context) const override;
 
@@ -1020,6 +1020,12 @@ static constexpr ExtensionEntry EXTENSION_SETTINGS[] = {
     {"unsafe_enable_version_guessing", "iceberg"},
 }; // END_OF_EXTENSION_SETTINGS
 
+static constexpr ExtensionEntry EXTENSION_SECRET_TYPES[] = {
+    {"aws", "httpfs"},         {"azure", "azure"},         {"gcs", "httpfs"},
+    {"huggingface", "httpfs"}, {"mysql", "mysql_scanner"}, {"postgres", "postgres_scanner"},
+    {"r2", "httpfs"},          {"s3", "httpfs"},
+}; // END_OF_EXTENSION_SECRET_TYPES
+
 // Note: these are currently hardcoded in scripts/generate_extensions_function.py
 // TODO: automate by passing though to script via duckdb
 static constexpr ExtensionEntry EXTENSION_COPY_FUNCTIONS[] = {{"parquet", "parquet"},
@@ -1077,14 +1083,6 @@ static constexpr ExtensionEntry EXTENSION_FILE_CONTAINS[] = {{".parquet?", "parq
                                                              {".ndjson?", ".jsonl?"},
                                                              {".jsonl?", ".ndjson?"}}; // EXTENSION_FILE_CONTAINS
 
-// Note: these are currently hardcoded in scripts/generate_extensions_function.py
-// TODO: automate by passing though to script via duckdb
-static constexpr ExtensionEntry EXTENSION_SECRET_TYPES[] = {
-    {"s3", "httpfs"},           {"r2", "httpfs"},
-    {"gcs", "httpfs"},          {"azure", "azure"},
-    {"huggingface", "httpfs"},  {"bearer", "httpfs"},
-    {"mysql", "mysql_scanner"}, {"postgres", "postgres_scanner"}}; // EXTENSION_SECRET_TYPES
-
 // Note: these are currently hardcoded in scripts/generate_extensions_function.py
 // TODO: automate by passing though to script via duckdb
 static constexpr ExtensionEntry EXTENSION_SECRET_PROVIDERS[] = {
 
@@ -38,6 +38,8 @@ class LogicalComparisonJoin : public LogicalJoin {
 	bool convert_mark_to_semi = true;
 	//! Scans where we should push generated filters into (if any)
 	unique_ptr<JoinFilterPushdownInfo> filter_pushdown;
+	//! Filtering predicate from the ON clause with expressions that don't reference both sides
+	unique_ptr<Expression> predicate;
 
 public:
 	InsertionOrderPreservingMap<string> ParamsToString() const override;
Original file line number	Diff line number	Diff line change
`@@ -24,7 +24,7 @@ class GZipFileSystem : public CompressedFileSystem {`
`24`	`24`	`}`
`25`	`25`
`26`	`26`	`//! Verifies that a buffer contains a valid GZIP header`
`27`		`- static void VerifyGZIPHeader(uint8_t gzip_hdr[], idx_t read_count);`
	`27`	`+ static void VerifyGZIPHeader(uint8_t gzip_hdr[], idx_t read_count, optional_ptr<CompressedFile> source_file);`
`28`	`28`	`static bool CheckIsZip(const char *length, idx_t size);`
`29`	`29`
`30`	`30`	`//! Consumes a byte stream as a gzip string, returning the decompressed string`