Skip to content

Commit 752810e

Browse files
committed
vendor: Update vendored sources to duckdb/duckdb@0e0b35a
Parquet Reader Cleanup: Move ColumnReaders to separate files (duckdb/duckdb#16092)
1 parent 9fe0548 commit 752810e

30 files changed

+982
-882
lines changed

src/duckdb/extension/parquet/column_reader.cpp

Lines changed: 13 additions & 848 deletions
Large diffs are not rendered by default.

src/duckdb/extension/parquet/geo_parquet.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
#include "duckdb/planner/expression/bound_function_expression.hpp"
99
#include "duckdb/planner/expression/bound_reference_expression.hpp"
1010
#include "duckdb/main/extension_helper.hpp"
11-
#include "expression_column_reader.hpp"
11+
#include "reader/expression_column_reader.hpp"
1212
#include "parquet_reader.hpp"
1313
#include "yyjson.hpp"
1414

src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
#pragma once
1010

1111
#include "column_reader.hpp"
12-
#include "templated_column_reader.hpp"
12+
#include "reader/templated_column_reader.hpp"
1313

1414
namespace duckdb {
1515

src/duckdb/extension/parquet/include/boolean_column_reader.hpp renamed to src/duckdb/extension/parquet/include/reader/boolean_column_reader.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
#pragma once
1010

1111
#include "column_reader.hpp"
12-
#include "templated_column_reader.hpp"
12+
#include "reader/templated_column_reader.hpp"
1313

1414
namespace duckdb {
1515

src/duckdb/extension/parquet/include/callback_column_reader.hpp renamed to src/duckdb/extension/parquet/include/reader/callback_column_reader.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
#pragma once
1010

1111
#include "column_reader.hpp"
12-
#include "templated_column_reader.hpp"
12+
#include "reader/templated_column_reader.hpp"
1313
#include "parquet_reader.hpp"
1414

1515
namespace duckdb {

src/duckdb/extension/parquet/include/cast_column_reader.hpp renamed to src/duckdb/extension/parquet/include/reader/cast_column_reader.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
#pragma once
1010

1111
#include "column_reader.hpp"
12-
#include "templated_column_reader.hpp"
12+
#include "reader/templated_column_reader.hpp"
1313

1414
namespace duckdb {
1515

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
//===----------------------------------------------------------------------===//
2+
// DuckDB
3+
//
4+
// decimal_column_reader.hpp
5+
//
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#pragma once
10+
11+
#include "column_reader.hpp"
12+
#include "reader/templated_column_reader.hpp"
13+
#include "parquet_reader.hpp"
14+
#include "parquet_decimal_utils.hpp"
15+
16+
namespace duckdb {
17+
18+
template <class DUCKDB_PHYSICAL_TYPE, bool FIXED_LENGTH>
19+
struct DecimalParquetValueConversion {
20+
static DUCKDB_PHYSICAL_TYPE PlainRead(ByteBuffer &plain_data, ColumnReader &reader) {
21+
idx_t byte_len;
22+
if (FIXED_LENGTH) {
23+
byte_len = (idx_t)reader.Schema().type_length; /* sure, type length needs to be a signed int */
24+
} else {
25+
byte_len = plain_data.read<uint32_t>();
26+
}
27+
plain_data.available(byte_len);
28+
auto res = ParquetDecimalUtils::ReadDecimalValue<DUCKDB_PHYSICAL_TYPE>(const_data_ptr_cast(plain_data.ptr),
29+
byte_len, reader.Schema());
30+
31+
plain_data.inc(byte_len);
32+
return res;
33+
}
34+
35+
static void PlainSkip(ByteBuffer &plain_data, ColumnReader &reader) {
36+
uint32_t decimal_len = FIXED_LENGTH ? reader.Schema().type_length : plain_data.read<uint32_t>();
37+
plain_data.inc(decimal_len);
38+
}
39+
40+
static bool PlainAvailable(const ByteBuffer &plain_data, const idx_t count) {
41+
return true;
42+
}
43+
44+
static DUCKDB_PHYSICAL_TYPE UnsafePlainRead(ByteBuffer &plain_data, ColumnReader &reader) {
45+
return PlainRead(plain_data, reader);
46+
}
47+
48+
static void UnsafePlainSkip(ByteBuffer &plain_data, ColumnReader &reader) {
49+
PlainSkip(plain_data, reader);
50+
}
51+
};
52+
53+
template <class DUCKDB_PHYSICAL_TYPE, bool FIXED_LENGTH>
54+
class DecimalColumnReader
55+
: public TemplatedColumnReader<DUCKDB_PHYSICAL_TYPE,
56+
DecimalParquetValueConversion<DUCKDB_PHYSICAL_TYPE, FIXED_LENGTH>> {
57+
using BaseType =
58+
TemplatedColumnReader<DUCKDB_PHYSICAL_TYPE, DecimalParquetValueConversion<DUCKDB_PHYSICAL_TYPE, FIXED_LENGTH>>;
59+
60+
public:
61+
DecimalColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, // NOLINT
62+
idx_t file_idx_p, idx_t max_define_p, idx_t max_repeat_p)
63+
: TemplatedColumnReader<DUCKDB_PHYSICAL_TYPE,
64+
DecimalParquetValueConversion<DUCKDB_PHYSICAL_TYPE, FIXED_LENGTH>>(
65+
reader, std::move(type_p), schema_p, file_idx_p, max_define_p, max_repeat_p) {
66+
}
67+
};
68+
69+
} // namespace duckdb

src/duckdb/extension/parquet/include/expression_column_reader.hpp renamed to src/duckdb/extension/parquet/include/reader/expression_column_reader.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
#pragma once
1010

1111
#include "column_reader.hpp"
12-
#include "templated_column_reader.hpp"
12+
#include "duckdb/execution/expression_executor.hpp"
1313

1414
namespace duckdb {
1515

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
//===----------------------------------------------------------------------===//
2+
// DuckDB
3+
//
4+
// interval_column_reader.hpp
5+
//
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#pragma once
10+
11+
#include "column_reader.hpp"
12+
#include "reader/templated_column_reader.hpp"
13+
#include "parquet_reader.hpp"
14+
15+
namespace duckdb {
16+
17+
//===--------------------------------------------------------------------===//
18+
// Interval Column Reader
19+
//===--------------------------------------------------------------------===//
20+
struct IntervalValueConversion {
21+
static constexpr const idx_t PARQUET_INTERVAL_SIZE = 12;
22+
23+
static interval_t ReadParquetInterval(const_data_ptr_t input) {
24+
interval_t result;
25+
result.months = Load<int32_t>(input);
26+
result.days = Load<int32_t>(input + sizeof(uint32_t));
27+
result.micros = int64_t(Load<uint32_t>(input + sizeof(uint32_t) * 2)) * 1000;
28+
return result;
29+
}
30+
31+
static interval_t PlainRead(ByteBuffer &plain_data, ColumnReader &reader) {
32+
plain_data.available(PARQUET_INTERVAL_SIZE);
33+
return UnsafePlainRead(plain_data, reader);
34+
}
35+
36+
static void PlainSkip(ByteBuffer &plain_data, ColumnReader &reader) {
37+
plain_data.inc(PARQUET_INTERVAL_SIZE);
38+
}
39+
40+
static bool PlainAvailable(const ByteBuffer &plain_data, const idx_t count) {
41+
return plain_data.check_available(count * PARQUET_INTERVAL_SIZE);
42+
}
43+
44+
static interval_t UnsafePlainRead(ByteBuffer &plain_data, ColumnReader &reader) {
45+
auto res = ReadParquetInterval(const_data_ptr_cast(plain_data.ptr));
46+
plain_data.unsafe_inc(PARQUET_INTERVAL_SIZE);
47+
return res;
48+
}
49+
50+
static void UnsafePlainSkip(ByteBuffer &plain_data, ColumnReader &reader) {
51+
plain_data.unsafe_inc(PARQUET_INTERVAL_SIZE);
52+
}
53+
};
54+
55+
class IntervalColumnReader : public TemplatedColumnReader<interval_t, IntervalValueConversion> {
56+
57+
public:
58+
IntervalColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t file_idx_p,
59+
idx_t max_define_p, idx_t max_repeat_p)
60+
: TemplatedColumnReader<interval_t, IntervalValueConversion>(reader, std::move(type_p), schema_p, file_idx_p,
61+
max_define_p, max_repeat_p) {};
62+
};
63+
64+
} // namespace duckdb

src/duckdb/extension/parquet/include/list_column_reader.hpp renamed to src/duckdb/extension/parquet/include/reader/list_column_reader.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
#pragma once
1010

1111
#include "column_reader.hpp"
12-
#include "templated_column_reader.hpp"
12+
#include "reader/templated_column_reader.hpp"
1313

1414
namespace duckdb {
1515

0 commit comments

Comments
 (0)