@@ -38,12 +38,18 @@ void DataFileStatsCollector::collectStats(
3838 VELOX_CHECK_NOT_NULL (fileMetadata);
3939
4040 std::unordered_set<int32_t > skipBoundsFields;
41+ std::unordered_map<int32_t , TypePtr> fieldIdToType;
4142 std::function<int32_t (IcebergDataFileStatsSettings*)> processFields =
4243 [&skipBoundsFields,
44+ &fieldIdToType,
4345 &processFields](IcebergDataFileStatsSettings* field) -> int32_t {
4446 if (field->skipBounds ) {
4547 skipBoundsFields.insert (field->fieldId );
4648 }
49+ // Store the Velox type for this field
50+ if (field->veloxType ) {
51+ fieldIdToType[field->fieldId ] = field->veloxType ;
52+ }
4753 if (field->children .empty ()) {
4854 return 1 ;
4955 }
@@ -112,13 +118,29 @@ void DataFileStatsCollector::collectStats(
112118 }
113119 }
114120
121+ // Only 4-byte DECIMAL values (precision <= 9) need byte-swapping.
115122 for (const auto & [fieldId, minStats] : globalMinStats) {
116- const auto lowerBound = minStats->MinValue ();
123+ auto lowerBound = minStats->MinValue ();
124+ auto upperBound = globalMaxStats[fieldId]->MaxValue ();
125+
126+ auto typeIt = fieldIdToType.find (fieldId);
127+ if (typeIt != fieldIdToType.end () && typeIt->second &&
128+ typeIt->second ->isDecimal () && lowerBound.size () == 4 ) {
129+ std::string lowerResult (4 , ' \0 ' );
130+ auto lowerSwapped = __builtin_bswap32 (
131+ *reinterpret_cast <const uint32_t *>(lowerBound.data ()));
132+ std::memcpy (lowerResult.data (), &lowerSwapped, 4 );
133+ lowerBound = lowerResult;
134+
135+ std::string upperResult (4 , ' \0 ' );
136+ auto upperSwapped = __builtin_bswap32 (
137+ *reinterpret_cast <const uint32_t *>(upperBound.data ()));
138+ std::memcpy (upperResult.data (), &upperSwapped, 4 );
139+ upperBound = upperResult;
140+ }
141+
117142 dataFileStats->lowerBounds [fieldId] =
118143 encoding::Base64::encode (lowerBound.data (), lowerBound.size ());
119- }
120- for (const auto & [fieldId, maxStats] : globalMaxStats) {
121- const auto upperBound = maxStats->MaxValue ();
122144 dataFileStats->upperBounds [fieldId] =
123145 encoding::Base64::encode (upperBound.data (), upperBound.size ());
124146 }
0 commit comments