Skip to content

Commit a30cfa0

Browse files
committed
vendor: Update vendored sources to duckdb/duckdb@cb05b0d
Varint to varchar optimization (duckdb/duckdb#15521) Deploy bundled static libraries for OSX arm64 and amd64 (duckdb/duckdb#15682)
1 parent bbf044a commit a30cfa0

File tree

3 files changed

+61
-20
lines changed

3 files changed

+61
-20
lines changed

src/duckdb/src/common/types/varint.cpp

Lines changed: 47 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#include "duckdb/common/types/varint.hpp"
22
#include "duckdb/common/exception/conversion_exception.hpp"
3+
#include "duckdb/common/numeric_utils.hpp"
4+
#include "duckdb/common/typedefs.hpp"
35
#include <cmath>
46

57
namespace duckdb {
@@ -159,10 +161,13 @@ void Varint::GetByteArray(vector<uint8_t> &byte_array, bool &is_negative, const
159161

160162
// Determine if the number is negative
161163
is_negative = (blob_ptr[0] & 0x80) == 0;
162-
for (idx_t i = 3; i < blob.GetSize(); i++) {
163-
if (is_negative) {
164+
byte_array.reserve(blob.GetSize() - 3);
165+
if (is_negative) {
166+
for (idx_t i = 3; i < blob.GetSize(); i++) {
164167
byte_array.push_back(static_cast<uint8_t>(~blob_ptr[i]));
165-
} else {
168+
}
169+
} else {
170+
for (idx_t i = 3; i < blob.GetSize(); i++) {
166171
byte_array.push_back(static_cast<uint8_t>(blob_ptr[i]));
167172
}
168173
}
@@ -184,28 +189,53 @@ string Varint::FromByteArray(uint8_t *data, idx_t size, bool is_negative) {
184189
return result;
185190
}
186191

192+
// Following CPython and Knuth (TAOCP, Volume 2 (3rd edn), section 4.4, Method 1b).
187193
string Varint::VarIntToVarchar(const string_t &blob) {
188194
string decimal_string;
189195
vector<uint8_t> byte_array;
190196
bool is_negative;
191197
GetByteArray(byte_array, is_negative, blob);
192-
while (!byte_array.empty()) {
193-
string quotient;
194-
uint8_t remainder = 0;
195-
for (uint8_t byte : byte_array) {
196-
int new_value = remainder * 256 + byte;
197-
quotient += DigitToChar(new_value / 10);
198-
remainder = static_cast<uint8_t>(new_value % 10);
198+
vector<digit_t> digits;
199+
// Rounding byte_array to digit_bytes multiple size, so that we can process every digit_bytes bytes
200+
// at a time without if check in the for loop
201+
idx_t padding_size = (-byte_array.size()) & (DIGIT_BYTES - 1);
202+
byte_array.insert(byte_array.begin(), padding_size, 0);
203+
for (idx_t i = 0; i < byte_array.size(); i += DIGIT_BYTES) {
204+
digit_t hi = 0;
205+
for (idx_t j = 0; j < DIGIT_BYTES; j++) {
206+
hi |= UnsafeNumericCast<digit_t>(byte_array[i + j]) << (8 * (DIGIT_BYTES - j - 1));
199207
}
200-
decimal_string += DigitToChar(remainder);
201-
// Remove leading zeros from the quotient
202-
byte_array.clear();
203-
for (char digit : quotient) {
204-
if (digit != '0' || !byte_array.empty()) {
205-
byte_array.push_back(static_cast<uint8_t>(CharToDigit(digit)));
206-
}
208+
209+
for (idx_t j = 0; j < digits.size(); j++) {
210+
twodigit_t tmp = UnsafeNumericCast<twodigit_t>(digits[j]) << DIGIT_BITS | hi;
211+
hi = static_cast<digit_t>(tmp / UnsafeNumericCast<twodigit_t>(DECIMAL_BASE));
212+
digits[j] = static_cast<digit_t>(tmp - UnsafeNumericCast<twodigit_t>(DECIMAL_BASE * hi));
213+
}
214+
215+
while (hi) {
216+
digits.push_back(hi % DECIMAL_BASE);
217+
hi /= DECIMAL_BASE;
218+
}
219+
}
220+
221+
if (digits.empty()) {
222+
digits.push_back(0);
223+
}
224+
225+
for (idx_t i = 0; i < digits.size() - 1; i++) {
226+
auto remain = digits[i];
227+
for (idx_t j = 0; j < DECIMAL_SHIFT; j++) {
228+
decimal_string += DigitToChar(static_cast<int>(remain % 10));
229+
remain /= 10;
207230
}
208231
}
232+
233+
auto remain = digits.back();
234+
do {
235+
decimal_string += DigitToChar(static_cast<int>(remain % 10));
236+
remain /= 10;
237+
} while (remain != 0);
238+
209239
if (is_negative) {
210240
decimal_string += '-';
211241
}

src/duckdb/src/function/table/version/pragma_version.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#ifndef DUCKDB_PATCH_VERSION
2-
#define DUCKDB_PATCH_VERSION "4-dev4496"
2+
#define DUCKDB_PATCH_VERSION "4-dev4508"
33
#endif
44
#ifndef DUCKDB_MINOR_VERSION
55
#define DUCKDB_MINOR_VERSION 1
@@ -8,10 +8,10 @@
88
#define DUCKDB_MAJOR_VERSION 1
99
#endif
1010
#ifndef DUCKDB_VERSION
11-
#define DUCKDB_VERSION "v1.1.4-dev4496"
11+
#define DUCKDB_VERSION "v1.1.4-dev4508"
1212
#endif
1313
#ifndef DUCKDB_SOURCE_ID
14-
#define DUCKDB_SOURCE_ID "20ae352efc"
14+
#define DUCKDB_SOURCE_ID "cb05b0d2f1"
1515
#endif
1616
#include "duckdb/function/table/system_functions.hpp"
1717
#include "duckdb/main/database.hpp"

src/duckdb/src/include/duckdb/common/types/varint.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,22 @@
1616
#include "duckdb/function/cast/default_casts.hpp"
1717

1818
namespace duckdb {
19+
using digit_t = uint32_t;
20+
using twodigit_t = uint64_t;
21+
1922
//! The Varint class is a static class that holds helper functions for the Varint type.
2023
class Varint {
2124
public:
2225
//! Header size of a Varint is always 3 bytes.
2326
DUCKDB_API static constexpr uint8_t VARINT_HEADER_SIZE = 3;
27+
//! Max(e such that 10**e fits in a digit_t)
28+
DUCKDB_API static constexpr uint8_t DECIMAL_SHIFT = 9;
29+
//! 10 ** DECIMAL_SHIFT
30+
DUCKDB_API static constexpr digit_t DECIMAL_BASE = 1000000000;
31+
//! Bytes of a digit_t
32+
DUCKDB_API static constexpr uint8_t DIGIT_BYTES = sizeof(digit_t);
33+
//! Bits of a digit_t
34+
DUCKDB_API static constexpr uint8_t DIGIT_BITS = DIGIT_BYTES * 8;
2435
//! Verifies if a Varint is valid. i.e., if it has 3 header bytes. The header correctly represents the number of
2536
//! data bytes, and the data bytes has no leading zero bytes.
2637
DUCKDB_API static void Verify(const string_t &input);

0 commit comments

Comments
 (0)