vendor: Update vendored sources to duckdb/duckdb@cb05b0d

krlmlr · krlmlr · commit a30cfa0fdc11 · 2025-02-02T12:14:02.000+01:00
Varint to varchar optimization (duckdb/duckdb#15521) Deploy bundled static libraries for OSX arm64 and amd64 (duckdb/duckdb#15682)
diff --git a/src/duckdb/src/common/types/varint.cpp b/src/duckdb/src/common/types/varint.cpp
@@ -1,5 +1,7 @@
 #include "duckdb/common/types/varint.hpp"
 #include "duckdb/common/exception/conversion_exception.hpp"
+#include "duckdb/common/numeric_utils.hpp"
+#include "duckdb/common/typedefs.hpp"
 #include <cmath>
 
 namespace duckdb {
@@ -159,10 +161,13 @@ void Varint::GetByteArray(vector<uint8_t> &byte_array, bool &is_negative, const
 
 	// Determine if the number is negative
 	is_negative = (blob_ptr[0] & 0x80) == 0;
-	for (idx_t i = 3; i < blob.GetSize(); i++) {
-		if (is_negative) {
+	byte_array.reserve(blob.GetSize() - 3);
+	if (is_negative) {
+		for (idx_t i = 3; i < blob.GetSize(); i++) {
 			byte_array.push_back(static_cast<uint8_t>(~blob_ptr[i]));
-		} else {
+		}
+	} else {
+		for (idx_t i = 3; i < blob.GetSize(); i++) {
 			byte_array.push_back(static_cast<uint8_t>(blob_ptr[i]));
 		}
 	}
@@ -184,28 +189,53 @@ string Varint::FromByteArray(uint8_t *data, idx_t size, bool is_negative) {
 	return result;
 }
 
+// Following CPython and Knuth (TAOCP, Volume 2 (3rd edn), section 4.4, Method 1b).
 string Varint::VarIntToVarchar(const string_t &blob) {
 	string decimal_string;
 	vector<uint8_t> byte_array;
 	bool is_negative;
 	GetByteArray(byte_array, is_negative, blob);
-	while (!byte_array.empty()) {
-		string quotient;
-		uint8_t remainder = 0;
-		for (uint8_t byte : byte_array) {
-			int new_value = remainder * 256 + byte;
-			quotient += DigitToChar(new_value / 10);
-			remainder = static_cast<uint8_t>(new_value % 10);
+	vector<digit_t> digits;
+	// Rounding byte_array to digit_bytes multiple size, so that we can process every digit_bytes bytes
+	// at a time without if check in the for loop
+	idx_t padding_size = (-byte_array.size()) & (DIGIT_BYTES - 1);
+	byte_array.insert(byte_array.begin(), padding_size, 0);
+	for (idx_t i = 0; i < byte_array.size(); i += DIGIT_BYTES) {
+		digit_t hi = 0;
+		for (idx_t j = 0; j < DIGIT_BYTES; j++) {
+			hi |= UnsafeNumericCast<digit_t>(byte_array[i + j]) << (8 * (DIGIT_BYTES - j - 1));
 		}
-		decimal_string += DigitToChar(remainder);
-		// Remove leading zeros from the quotient
-		byte_array.clear();
-		for (char digit : quotient) {
-			if (digit != '0' || !byte_array.empty()) {
-				byte_array.push_back(static_cast<uint8_t>(CharToDigit(digit)));
-			}
+
+		for (idx_t j = 0; j < digits.size(); j++) {
+			twodigit_t tmp = UnsafeNumericCast<twodigit_t>(digits[j]) << DIGIT_BITS | hi;
+			hi = static_cast<digit_t>(tmp / UnsafeNumericCast<twodigit_t>(DECIMAL_BASE));
+			digits[j] = static_cast<digit_t>(tmp - UnsafeNumericCast<twodigit_t>(DECIMAL_BASE * hi));
+		}
+
+		while (hi) {
+			digits.push_back(hi % DECIMAL_BASE);
+			hi /= DECIMAL_BASE;
+		}
+	}
+
+	if (digits.empty()) {
+		digits.push_back(0);
+	}
+
+	for (idx_t i = 0; i < digits.size() - 1; i++) {
+		auto remain = digits[i];
+		for (idx_t j = 0; j < DECIMAL_SHIFT; j++) {
+			decimal_string += DigitToChar(static_cast<int>(remain % 10));
+			remain /= 10;
 		}
 	}
+
+	auto remain = digits.back();
+	do {
+		decimal_string += DigitToChar(static_cast<int>(remain % 10));
+		remain /= 10;
+	} while (remain != 0);
+
 	if (is_negative) {
 		decimal_string += '-';
 	}
diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp
@@ -1,5 +1,5 @@
 #ifndef DUCKDB_PATCH_VERSION
-#define DUCKDB_PATCH_VERSION "4-dev4496"
+#define DUCKDB_PATCH_VERSION "4-dev4508"
 #endif
 #ifndef DUCKDB_MINOR_VERSION
 #define DUCKDB_MINOR_VERSION 1
@@ -8,10 +8,10 @@
 #define DUCKDB_MAJOR_VERSION 1
 #endif
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "v1.1.4-dev4496"
+#define DUCKDB_VERSION "v1.1.4-dev4508"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "20ae352efc"
+#define DUCKDB_SOURCE_ID "cb05b0d2f1"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"
diff --git a/src/duckdb/src/include/duckdb/common/types/varint.hpp b/src/duckdb/src/include/duckdb/common/types/varint.hpp
@@ -16,11 +16,22 @@
 #include "duckdb/function/cast/default_casts.hpp"
 
 namespace duckdb {
+using digit_t = uint32_t;
+using twodigit_t = uint64_t;
+
 //! The Varint class is a static class that holds helper functions for the Varint type.
 class Varint {
 public:
 	//! Header size of a Varint is always 3 bytes.
 	DUCKDB_API static constexpr uint8_t VARINT_HEADER_SIZE = 3;
+	//! Max(e such that 10**e fits in a digit_t)
+	DUCKDB_API static constexpr uint8_t DECIMAL_SHIFT = 9;
+	//! 10 ** DECIMAL_SHIFT
+	DUCKDB_API static constexpr digit_t DECIMAL_BASE = 1000000000;
+	//! Bytes of a digit_t
+	DUCKDB_API static constexpr uint8_t DIGIT_BYTES = sizeof(digit_t);
+	//! Bits of a digit_t
+	DUCKDB_API static constexpr uint8_t DIGIT_BITS = DIGIT_BYTES * 8;
 	//! Verifies if a Varint is valid. i.e., if it has 3 header bytes. The header correctly represents the number of
 	//! data bytes, and the data bytes has no leading zero bytes.
 	DUCKDB_API static void Verify(const string_t &input);