Skip to content

Commit 5a879e9

Browse files
Antonov548krlmlr
authored andcommitted
vendor: Update vendored sources to duckdb/duckdb@920b39a
Read support for Parquet Float16 (duckdb/duckdb#16395) Fix issue duckdb/duckdb#16377 (duckdb/duckdb#16391)
1 parent e996fef commit 5a879e9

File tree

9 files changed

+94
-8
lines changed

9 files changed

+94
-8
lines changed

src/duckdb/extension/parquet/column_reader.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,15 @@
1414
#include "reader/null_column_reader.hpp"
1515
#include "parquet_reader.hpp"
1616
#include "parquet_timestamp.hpp"
17+
#include "parquet_float16.hpp"
18+
1719
#include "reader/row_number_column_reader.hpp"
1820
#include "snappy.h"
1921
#include "reader/string_column_reader.hpp"
2022
#include "reader/struct_column_reader.hpp"
2123
#include "reader/templated_column_reader.hpp"
2224
#include "reader/uuid_column_reader.hpp"
25+
2326
#include "zstd.h"
2427

2528
#include "duckdb/storage/table/column_segment.hpp"
@@ -766,6 +769,9 @@ unique_ptr<ColumnReader> ColumnReader::CreateReader(ParquetReader &reader, const
766769
case LogicalTypeId::BIGINT:
767770
return make_uniq<TemplatedColumnReader<int64_t, TemplatedParquetValueConversion<int64_t>>>(reader, schema);
768771
case LogicalTypeId::FLOAT:
772+
if (schema.type_info == ParquetExtraTypeInfo::FLOAT16) {
773+
return make_uniq<CallbackColumnReader<uint16_t, float, Float16ToFloat32>>(reader, schema);
774+
}
769775
return make_uniq<TemplatedColumnReader<float, TemplatedParquetValueConversion<float>>>(reader, schema);
770776
case LogicalTypeId::DOUBLE:
771777
if (schema.type_info == ParquetExtraTypeInfo::DECIMAL_BYTE_ARRAY) {

src/duckdb/extension/parquet/include/parquet_column_schema.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ enum class ParquetExtraTypeInfo {
2323
UNIT_MICROS,
2424
DECIMAL_BYTE_ARRAY,
2525
DECIMAL_INT32,
26-
DECIMAL_INT64
26+
DECIMAL_INT64,
27+
FLOAT16
2728
};
2829

2930
struct ParquetColumnSchema {
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
//===----------------------------------------------------------------------===//
2+
// DuckDB
3+
//
4+
// parquet_timestamp.hpp
5+
//
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#pragma once
10+
11+
#include "duckdb.hpp"
12+
13+
namespace duckdb {
14+
15+
float Float16ToFloat32(const uint16_t &float16_value);
16+
17+
} // namespace duckdb
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
#include "parquet_float16.hpp"
2+
3+
#include "duckdb.hpp"
4+
#ifndef DUCKDB_AMALGAMATION
5+
6+
#endif
7+
8+
namespace duckdb {
9+
10+
float Float16ToFloat32(const uint16_t &float16_value) {
11+
uint32_t sign = float16_value >> 15;
12+
uint32_t exponent = (float16_value >> 10) & 0x1F;
13+
uint32_t fraction = (float16_value & 0x3FF);
14+
uint32_t float32_value;
15+
if (exponent == 0) {
16+
if (fraction == 0) {
17+
// zero
18+
float32_value = (sign << 31);
19+
} else {
20+
// can be represented as ordinary value in float32
21+
// 2 ** -14 * 0.0101
22+
// => 2 ** -16 * 1.0100
23+
// int int_exponent = -14;
24+
exponent = 127 - 14;
25+
while ((fraction & (1 << 10)) == 0) {
26+
// int_exponent--;
27+
exponent--;
28+
fraction <<= 1;
29+
}
30+
fraction &= 0x3FF;
31+
// int_exponent += 127;
32+
float32_value = (sign << 31) | (exponent << 23) | (fraction << 13);
33+
}
34+
} else if (exponent == 0x1F) {
35+
/* Inf or NaN */
36+
float32_value = (sign << 31) | (0xFF << 23) | (fraction << 13);
37+
} else {
38+
/* ordinary number */
39+
float32_value = (sign << 31) | ((exponent + (127 - 15)) << 23) | (fraction << 13);
40+
}
41+
42+
return *((float *)&float32_value);
43+
}
44+
45+
} // namespace duckdb

src/duckdb/extension/parquet/parquet_metadata.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,9 @@ Value ParquetLogicalTypeToString(const duckdb_parquet::LogicalType &type, bool i
424424
if (type.__isset.UUID) {
425425
return Value(PrintParquetElementToString(type.UUID));
426426
}
427+
if (type.__isset.FLOAT16) {
428+
return Value(PrintParquetElementToString(type.FLOAT16));
429+
}
427430
return Value();
428431
}
429432

src/duckdb/extension/parquet/parquet_reader.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,11 @@ LogicalType ParquetReader::DeriveLogicalType(const SchemaElement &s_ele, Parquet
133133
if (s_ele.type == Type::FIXED_LEN_BYTE_ARRAY) {
134134
return LogicalType::UUID;
135135
}
136+
} else if (s_ele.logicalType.__isset.FLOAT16) {
137+
if (s_ele.type == Type::FIXED_LEN_BYTE_ARRAY && s_ele.type_length == 2) {
138+
schema.type_info = ParquetExtraTypeInfo::FLOAT16;
139+
return LogicalType::FLOAT;
140+
}
136141
} else if (s_ele.logicalType.__isset.TIMESTAMP) {
137142
if (s_ele.logicalType.TIMESTAMP.unit.__isset.MILLIS) {
138143
schema.type_info = ParquetExtraTypeInfo::UNIT_MS;

src/duckdb/extension/parquet/parquet_statistics.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include "duckdb.hpp"
44
#include "parquet_decimal_utils.hpp"
55
#include "parquet_timestamp.hpp"
6+
#include "parquet_float16.hpp"
67
#include "parquet_reader.hpp"
78
#include "reader/string_column_reader.hpp"
89
#include "reader/struct_column_reader.hpp"
@@ -93,10 +94,18 @@ Value ParquetStatisticsUtils::ConvertValueInternal(const LogicalType &type, cons
9394
}
9495
return Value::BIGINT(Load<int64_t>(stats_data));
9596
case LogicalTypeId::FLOAT: {
96-
if (stats.size() != sizeof(float)) {
97-
throw InvalidInputException("Incorrect stats size for type FLOAT");
97+
float val;
98+
if (schema_ele.type_info == ParquetExtraTypeInfo::FLOAT16) {
99+
if (stats.size() != sizeof(uint16_t)) {
100+
throw InvalidInputException("Incorrect stats size for type FLOAT16");
101+
}
102+
val = Float16ToFloat32(Load<uint16_t>(stats_data));
103+
} else {
104+
if (stats.size() != sizeof(float)) {
105+
throw InvalidInputException("Incorrect stats size for type FLOAT");
106+
}
107+
val = Load<float>(stats_data);
98108
}
99-
auto val = Load<float>(stats_data);
100109
if (!Value::FloatIsFinite(val)) {
101110
return Value();
102111
}

src/duckdb/src/function/table/version/pragma_version.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#ifndef DUCKDB_PATCH_VERSION
2-
#define DUCKDB_PATCH_VERSION "1-dev750"
2+
#define DUCKDB_PATCH_VERSION "1-dev754"
33
#endif
44
#ifndef DUCKDB_MINOR_VERSION
55
#define DUCKDB_MINOR_VERSION 2
@@ -8,10 +8,10 @@
88
#define DUCKDB_MAJOR_VERSION 1
99
#endif
1010
#ifndef DUCKDB_VERSION
11-
#define DUCKDB_VERSION "v1.2.1-dev750"
11+
#define DUCKDB_VERSION "v1.2.1-dev754"
1212
#endif
1313
#ifndef DUCKDB_SOURCE_ID
14-
#define DUCKDB_SOURCE_ID "c8cc956524"
14+
#define DUCKDB_SOURCE_ID "920b39ad96"
1515
#endif
1616
#include "duckdb/function/table/system_functions.hpp"
1717
#include "duckdb/main/database.hpp"

src/include/sources.mk

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)