Skip to content

Commit 87ba0b5

Browse files
committed
Apply urlEncode to all partition values
1 parent 2086483 commit 87ba0b5

File tree

3 files changed

+22
-28
lines changed

3 files changed

+22
-28
lines changed

velox/connectors/hive/iceberg/IcebergPartitionIdGenerator.cpp

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,26 @@ std::pair<std::string, std::string> makePartitionKeyValueString(
4040
partitionVector->as<SimpleVector<T>>()->valueAt(row)));
4141
}
4242

43+
// Iceberg spec requires URL encoding in the partition path.
44+
// This function matches java.net.URLEncoder.encode(string, "UTF-8").
45+
std::string urlEncode(const StringView& data) {
46+
std::ostringstream ret;
47+
48+
for (unsigned char c : data) {
49+
// These characters are not encoded in Java's URLEncoder.
50+
if (std::isalnum(c) || c == '-' || c == '_' || c == '.' || c == '*') {
51+
ret << c;
52+
} else if (c == ' ') {
53+
ret << '+';
54+
} else {
55+
// All other characters are percent-encoded.
56+
ret << fmt::format("%{:02X}", c);
57+
}
58+
}
59+
60+
return ret.str();
61+
}
62+
4363
} // namespace
4464

4565
IcebergPartitionIdGenerator::IcebergPartitionIdGenerator(
@@ -178,7 +198,7 @@ std::string IcebergPartitionIdGenerator::partitionName(
178198
if (partitionPathAsLowerCase_) {
179199
folly::toLowerAscii(key);
180200
}
181-
ret << fmt::format("{}={}", urlEncode(key.data()), value);
201+
ret << fmt::format("{}={}", urlEncode(key.data()), urlEncode(value.data()));
182202
}
183203

184204
return ret.str();

velox/connectors/hive/iceberg/Transforms.cpp

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -45,26 +45,6 @@ const TypePtr findChildTypeKind(
4545
return currentType;
4646
}
4747

48-
// Iceberg spec requires URL encoding in the partition path.
49-
// This function matches java.net.URLEncoder.encode(string, "UTF-8").
50-
std::string urlEncode(const StringView& data) {
51-
std::ostringstream ret;
52-
53-
for (unsigned char c : data) {
54-
// These characters are not encoded in Java's URLEncoder.
55-
if (std::isalnum(c) || c == '-' || c == '_' || c == '.' || c == '*') {
56-
ret << c;
57-
} else if (c == ' ') {
58-
ret << '+';
59-
} else {
60-
// All other characters are percent-encoded.
61-
ret << fmt::format("%{:02X}", c);
62-
}
63-
}
64-
65-
return ret.str();
66-
}
67-
6848
template <typename T>
6949
VectorPtr IdentityTransform<T>::apply(const VectorPtr& block) const {
7050
if constexpr (std::is_same_v<T, StringView>) {

velox/connectors/hive/iceberg/Transforms.h

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,6 @@ const TypePtr findChildTypeKind(
2929
const RowTypePtr& inputType,
3030
const std::string& fullName);
3131

32-
std::string urlEncode(const StringView& data);
33-
3432
class Transform {
3533
public:
3634
Transform(TypePtr type, TransformType transformType, memory::MemoryPool* pool)
@@ -63,10 +61,6 @@ class Transform {
6361
return decimalToHumanString(value);
6462
}
6563

66-
std::string toHumanString(const StringView& value) const {
67-
return urlEncode(value);
68-
}
69-
7064
std::string toHumanString(bool value) const {
7165
return value ? "true" : "false";
7266
}
@@ -80,7 +74,7 @@ class Transform {
8074
options.skipTrailingZeros = true;
8175
options.leadingPositiveSign = true;
8276
options.skipTrailingZeroSeconds = true;
83-
return urlEncode(value.toString(options).data());
77+
return value.toString(options);
8478
}
8579

8680
std::string name() const {

0 commit comments

Comments
 (0)