Skip to content

Commit 13e1639

Browse files
committed
vendor: Update vendored sources to duckdb/duckdb@d769821
Parquet Reader: for DeltaLengthByteArray encoding, directly refer to strings from the block without copying (duckdb/duckdb#16186)
1 parent f34ed60 commit 13e1639

File tree

6 files changed

+25
-18
lines changed

6 files changed

+25
-18
lines changed

src/duckdb/extension/parquet/column_reader.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -549,7 +549,7 @@ void ColumnReader::ReadData(idx_t read_now, data_ptr_t define_out, data_ptr_t re
549549
rle_decoder.Read(define_ptr, read_now, result, result_offset);
550550
break;
551551
case ColumnEncoding::DELTA_LENGTH_BYTE_ARRAY:
552-
delta_length_byte_array_decoder.Read(define_ptr, read_now, result, result_offset);
552+
delta_length_byte_array_decoder.Read(block, define_ptr, read_now, result, result_offset);
553553
break;
554554
case ColumnEncoding::DELTA_BYTE_ARRAY:
555555
delta_byte_array_decoder.Read(define_ptr, read_now, result, result_offset);

src/duckdb/extension/parquet/decoder/delta_length_byte_array_decoder.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,12 @@
22
#include "decoder/delta_byte_array_decoder.hpp"
33
#include "column_reader.hpp"
44
#include "parquet_reader.hpp"
5-
#include "reader/templated_column_reader.hpp"
5+
#include "reader/string_column_reader.hpp"
66

77
namespace duckdb {
88

99
DeltaLengthByteArrayDecoder::DeltaLengthByteArrayDecoder(ColumnReader &reader)
10-
: reader(reader), length_buffer(reader.encoding_buffers[0]) {
10+
: reader(reader), length_buffer(reader.encoding_buffers[0]), length_idx(0) {
1111
}
1212

1313
void DeltaLengthByteArrayDecoder::InitializePage() {
@@ -21,8 +21,9 @@ void DeltaLengthByteArrayDecoder::InitializePage() {
2121
length_idx = 0;
2222
}
2323

24-
void DeltaLengthByteArrayDecoder::Read(uint8_t *defines, idx_t read_count, Vector &result, idx_t result_offset) {
25-
auto &block = *reader.block;
24+
void DeltaLengthByteArrayDecoder::Read(shared_ptr<ResizeableBuffer> &block_ref, uint8_t *defines, idx_t read_count,
25+
Vector &result, idx_t result_offset) {
26+
auto &block = *block_ref;
2627
auto length_data = reinterpret_cast<uint32_t *>(length_buffer.ptr);
2728
auto result_data = FlatVector::GetData<string_t>(result);
2829
auto &result_mask = FlatVector::Validity(result);
@@ -40,17 +41,16 @@ void DeltaLengthByteArrayDecoder::Read(uint8_t *defines, idx_t read_count, Vecto
4041
}
4142
auto str_len = length_data[length_idx++];
4243
block.available(str_len);
43-
result_data[result_idx] = StringVector::EmptyString(result, str_len);
44-
auto str_data = result_data[result_idx].GetDataWriteable();
45-
memcpy(str_data, block.ptr, str_len);
46-
block.inc(str_len);
47-
result_data[result_idx].Finalize();
44+
result_data[result_idx] = string_t(char_ptr_cast(block.ptr), str_len);
45+
block.unsafe_inc(str_len);
4846
}
47+
StringColumnReader::ReferenceBlock(result, block_ref);
4948
}
5049

5150
void DeltaLengthByteArrayDecoder::Skip(uint8_t *defines, idx_t skip_count) {
5251
auto &block = *reader.block;
5352
auto length_data = reinterpret_cast<uint32_t *>(length_buffer.ptr);
53+
idx_t skip_bytes = 0;
5454
for (idx_t row_idx = 0; row_idx < skip_count; row_idx++) {
5555
if (defines && defines[row_idx] != reader.MaxDefine()) {
5656
continue;
@@ -61,9 +61,9 @@ void DeltaLengthByteArrayDecoder::Skip(uint8_t *defines, idx_t skip_count) {
6161
"read of %d from %d entries) - corrupt file?",
6262
length_idx, byte_array_count);
6363
}
64-
auto str_len = length_data[length_idx++];
65-
block.inc(str_len);
64+
skip_bytes += length_data[length_idx++];
6665
}
66+
block.inc(skip_bytes);
6767
}
6868

6969
} // namespace duckdb

src/duckdb/extension/parquet/include/decoder/delta_length_byte_array_decoder.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ class DeltaLengthByteArrayDecoder {
2222
public:
2323
void InitializePage();
2424

25-
void Read(uint8_t *defines, idx_t read_count, Vector &result, idx_t result_offset);
25+
void Read(shared_ptr<ResizeableBuffer> &block, uint8_t *defines, idx_t read_count, Vector &result,
26+
idx_t result_offset);
2627
void Skip(uint8_t *defines, idx_t skip_count);
2728

2829
private:

src/duckdb/extension/parquet/include/reader/string_column_reader.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ class StringColumnReader : public ColumnReader {
2525
static void VerifyString(const char *str_data, uint32_t str_len, const bool isVarchar);
2626
void VerifyString(const char *str_data, uint32_t str_len);
2727

28+
static void ReferenceBlock(Vector &result, shared_ptr<ResizeableBuffer> &block);
29+
2830
protected:
2931
void Plain(ByteBuffer &plain_data, uint8_t *defines, idx_t num_values, idx_t result_offset,
3032
Vector &result) override {

src/duckdb/extension/parquet/reader/string_column_reader.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,13 @@ class ParquetStringVectorBuffer : public VectorBuffer {
4545
shared_ptr<ResizeableBuffer> buffer;
4646
};
4747

48+
void StringColumnReader::ReferenceBlock(Vector &result, shared_ptr<ResizeableBuffer> &block) {
49+
StringVector::AddBuffer(result, make_buffer<ParquetStringVectorBuffer>(block));
50+
}
51+
4852
void StringColumnReader::Plain(shared_ptr<ResizeableBuffer> &plain_data, uint8_t *defines, idx_t num_values,
4953
idx_t result_offset, Vector &result) {
50-
StringVector::AddBuffer(result, make_buffer<ParquetStringVectorBuffer>(plain_data));
54+
ReferenceBlock(result, plain_data);
5155
PlainTemplated<string_t, StringParquetValueConversion>(*plain_data, defines, num_values, result_offset, result);
5256
}
5357

@@ -57,7 +61,7 @@ void StringColumnReader::PlainSkip(ByteBuffer &plain_data, uint8_t *defines, idx
5761

5862
void StringColumnReader::PlainSelect(shared_ptr<ResizeableBuffer> &plain_data, uint8_t *defines, idx_t num_values,
5963
Vector &result, const SelectionVector &sel, idx_t count) {
60-
StringVector::AddBuffer(result, make_buffer<ParquetStringVectorBuffer>(plain_data));
64+
ReferenceBlock(result, plain_data);
6165
PlainSelectTemplated<string_t, StringParquetValueConversion>(*plain_data, defines, num_values, result, sel, count);
6266
}
6367

src/duckdb/src/function/table/version/pragma_version.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#ifndef DUCKDB_PATCH_VERSION
2-
#define DUCKDB_PATCH_VERSION "1-dev203"
2+
#define DUCKDB_PATCH_VERSION "1-dev205"
33
#endif
44
#ifndef DUCKDB_MINOR_VERSION
55
#define DUCKDB_MINOR_VERSION 2
@@ -8,10 +8,10 @@
88
#define DUCKDB_MAJOR_VERSION 1
99
#endif
1010
#ifndef DUCKDB_VERSION
11-
#define DUCKDB_VERSION "v1.2.1-dev203"
11+
#define DUCKDB_VERSION "v1.2.1-dev205"
1212
#endif
1313
#ifndef DUCKDB_SOURCE_ID
14-
#define DUCKDB_SOURCE_ID "1584c228c8"
14+
#define DUCKDB_SOURCE_ID "d76982157f"
1515
#endif
1616
#include "duckdb/function/table/system_functions.hpp"
1717
#include "duckdb/main/database.hpp"

0 commit comments

Comments
 (0)