11#include " duckdb/common/types/varint.hpp"
22#include " duckdb/common/exception/conversion_exception.hpp"
3+ #include " duckdb/common/numeric_utils.hpp"
4+ #include " duckdb/common/typedefs.hpp"
35#include < cmath>
46
57namespace duckdb {
@@ -159,10 +161,13 @@ void Varint::GetByteArray(vector<uint8_t> &byte_array, bool &is_negative, const
159161
160162 // Determine if the number is negative
161163 is_negative = (blob_ptr[0 ] & 0x80 ) == 0 ;
162- for (idx_t i = 3 ; i < blob.GetSize (); i++) {
163- if (is_negative) {
164+ byte_array.reserve (blob.GetSize () - 3 );
165+ if (is_negative) {
166+ for (idx_t i = 3 ; i < blob.GetSize (); i++) {
164167 byte_array.push_back (static_cast <uint8_t >(~blob_ptr[i]));
165- } else {
168+ }
169+ } else {
170+ for (idx_t i = 3 ; i < blob.GetSize (); i++) {
166171 byte_array.push_back (static_cast <uint8_t >(blob_ptr[i]));
167172 }
168173 }
@@ -184,28 +189,53 @@ string Varint::FromByteArray(uint8_t *data, idx_t size, bool is_negative) {
184189 return result;
185190}
186191
192+ // Following CPython and Knuth (TAOCP, Volume 2 (3rd edn), section 4.4, Method 1b).
187193string Varint::VarIntToVarchar (const string_t &blob) {
188194 string decimal_string;
189195 vector<uint8_t > byte_array;
190196 bool is_negative;
191197 GetByteArray (byte_array, is_negative, blob);
192- while (!byte_array.empty ()) {
193- string quotient;
194- uint8_t remainder = 0 ;
195- for (uint8_t byte : byte_array) {
196- int new_value = remainder * 256 + byte;
197- quotient += DigitToChar (new_value / 10 );
198- remainder = static_cast <uint8_t >(new_value % 10 );
198+ vector<digit_t > digits;
199+ // Rounding byte_array to digit_bytes multiple size, so that we can process every digit_bytes bytes
200+ // at a time without if check in the for loop
201+ idx_t padding_size = (-byte_array.size ()) & (DIGIT_BYTES - 1 );
202+ byte_array.insert (byte_array.begin (), padding_size, 0 );
203+ for (idx_t i = 0 ; i < byte_array.size (); i += DIGIT_BYTES) {
204+ digit_t hi = 0 ;
205+ for (idx_t j = 0 ; j < DIGIT_BYTES; j++) {
206+ hi |= UnsafeNumericCast<digit_t >(byte_array[i + j]) << (8 * (DIGIT_BYTES - j - 1 ));
199207 }
200- decimal_string += DigitToChar (remainder);
201- // Remove leading zeros from the quotient
202- byte_array.clear ();
203- for (char digit : quotient) {
204- if (digit != ' 0' || !byte_array.empty ()) {
205- byte_array.push_back (static_cast <uint8_t >(CharToDigit (digit)));
206- }
208+
209+ for (idx_t j = 0 ; j < digits.size (); j++) {
210+ twodigit_t tmp = UnsafeNumericCast<twodigit_t >(digits[j]) << DIGIT_BITS | hi;
211+ hi = static_cast <digit_t >(tmp / UnsafeNumericCast<twodigit_t >(DECIMAL_BASE));
212+ digits[j] = static_cast <digit_t >(tmp - UnsafeNumericCast<twodigit_t >(DECIMAL_BASE * hi));
213+ }
214+
215+ while (hi) {
216+ digits.push_back (hi % DECIMAL_BASE);
217+ hi /= DECIMAL_BASE;
218+ }
219+ }
220+
221+ if (digits.empty ()) {
222+ digits.push_back (0 );
223+ }
224+
225+ for (idx_t i = 0 ; i < digits.size () - 1 ; i++) {
226+ auto remain = digits[i];
227+ for (idx_t j = 0 ; j < DECIMAL_SHIFT; j++) {
228+ decimal_string += DigitToChar (static_cast <int >(remain % 10 ));
229+ remain /= 10 ;
207230 }
208231 }
232+
233+ auto remain = digits.back ();
234+ do {
235+ decimal_string += DigitToChar (static_cast <int >(remain % 10 ));
236+ remain /= 10 ;
237+ } while (remain != 0 );
238+
209239 if (is_negative) {
210240 decimal_string += ' -' ;
211241 }
0 commit comments