From a49ef0ea299be7d1314293f2de9f64ee5a06760e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Ma=C5=9Blanka?= Date: Tue, 25 Feb 2025 14:18:02 +0100 Subject: [PATCH] http/utils: changed url encoding to use upper case hex representation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the S3 documentation related with signature calculation the canonical URI format must be encoded and use the upper case characters in hexadecimal characters representation. Document: https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-header-based-auth.html This PR changes the canonical URI encode to use the upper case hexadecimal representation. The problem started to occur when a partition key was added as a part of Iceberg parquet file path. As the path may contain some characters that require encoding. Signed-off-by: Michał Maślanka (cherry picked from commit 48efe260a09c7daf7d3effc110ebd26b11d80eda) --- src/v/http/tests/request_builder_test.cc | 6 ++--- src/v/http/tests/uri_encoding_test.cc | 26 +++++++++---------- src/v/http/utils.cc | 14 +++++----- .../rest_client/tests/catalog_client_tests.cc | 2 +- src/v/iceberg/tests/rest_catalog_test.cc | 2 +- 5 files changed, 25 insertions(+), 25 deletions(-) diff --git a/src/v/http/tests/request_builder_test.cc b/src/v/http/tests/request_builder_test.cc index aea33d2ac1d8c..85f078e239238 100644 --- a/src/v/http/tests/request_builder_test.cc +++ b/src/v/http/tests/request_builder_test.cc @@ -94,7 +94,7 @@ TEST(request_builder, test_url_percent_encoding) { ASSERT_TRUE(req.has_value()); EXPECT_EQ( req->target(), - "/?afgz0119%21%23%24%26%27%28%29%2a%2b%2c%2F%3a%3b%3d%3f%" - "40%5b%5dafgz0119=afgz0119%21%23%24%26%27%28%29%2a%2b%2c%2F%3a%3b%3d%" - "3f%40%5b%5dafgz0119"); + "/?afgz0119%21%23%24%26%27%28%29%2A%2B%2C%2F%3A%3B%3D%3F%" + "40%5B%5Dafgz0119=afgz0119%21%23%24%26%27%28%29%2A%2B%2C%2F%3A%3B%3D%" + "3F%40%5B%5Dafgz0119"); } diff --git a/src/v/http/tests/uri_encoding_test.cc b/src/v/http/tests/uri_encoding_test.cc index 343a525b0b3e1..da5975239fdf0 100644 --- a/src/v/http/tests/uri_encoding_test.cc +++ b/src/v/http/tests/uri_encoding_test.cc @@ -21,13 +21,13 @@ TEST(uri_encoding, encode_special_chars) { constexpr auto input = "afgz0119!#$&'()*+,/:;=?@[]afgz0119"; ASSERT_EQ( http::uri_encode(input, http::uri_encode_slash::yes), - "afgz0119%21%23%24%26%27%28%29%2a%2b%2c%2F%3a%3b%3d%3f%" - "40%5b%5dafgz0119"); + "afgz0119%21%23%24%26%27%28%29%2A%2B%2C%2F%3A%3B%3D%3F%" + "40%5B%5Dafgz0119"); ASSERT_EQ( http::uri_encode(input, http::uri_encode_slash::no), - "afgz0119%21%23%24%26%27%28%29%2a%2b%2c/" - "%3a%3b%3d%3f%40%5b%5dafgz0119"); + "afgz0119%21%23%24%26%27%28%29%2A%2B%2C/" + "%3A%3B%3D%3F%40%5B%5Dafgz0119"); } TEST(uri_encoding, form_encoded_data) { @@ -38,15 +38,15 @@ TEST(uri_encoding, form_encoded_data) { iobuf_parser p{http::form_encode_data(input)}; // Order of kv pairs is not guaranteed due to map iteration order + auto s = p.read_string(p.bytes_left()); + fmt::print(">>> {}\n", s); ASSERT_THAT( - p.read_string(p.bytes_left()), + s, AnyOf( - "foo=bar&afgz0119%21%23%24%26%27%28%29%2a%2b%2c%2F%3a%3b%3d%3f%40%" - "5b%" - "5dafgz0119=afgz0119%21%23%24%26%27%28%29%2a%2b%2c%2F%3a%3b%3d%3f%" - "40%" - "5b%5dafgz0119", - "afgz0119%21%23%24%26%27%28%29%2a%2b%2c%2F%3a%3b%3d%3f%" - "40%5b%5dafgz0119=afgz0119%21%23%24%26%27%28%29%2a%2b%2c%2F%3a%" - "3b%3d%3f%40%5b%5dafgz0119&foo=bar")); + "foo=bar&afgz0119%21%23%24%26%27%28%29%2A%2B%2C%2F%3A%3B%3D%3F%40%5B%" + "5Dafgz0119=afgz0119%21%23%24%26%27%28%29%2A%2B%2C%2F%3A%3B%3D%3F%40%" + "5B%5Dafgz0119", + "afgz0119%21%23%24%26%27%28%29%2A%2B%2C%2F%3A%3B%3D%3F%" + "40%5B%5Dafgz0119=afgz0119%21%23%24%26%27%28%29%2A%2B%2C%2F%3A%" + "3B%3D%3F%40%5B%5Dafgz0119&foo=bar")); } diff --git a/src/v/http/utils.cc b/src/v/http/utils.cc index a4fd40ad7f1d6..28eb9be033a55 100644 --- a/src/v/http/utils.cc +++ b/src/v/http/utils.cc @@ -11,16 +11,16 @@ #include "http/utils.h" -#include "bytes/bytes.h" - #include - namespace { - +/** + * Each URI encoded byte is formed by a '%' and the two-digit hexadecimal + * value of the byte. + * from: + * https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-header-based-auth.html + */ inline void append_hex_utf8(ss::sstring& result, char ch) { - bytes b = {static_cast(ch)}; - result.append("%", 1); - auto h = to_hex(b); + auto h = fmt::format("%{:02X}", static_cast(ch)); result.append(h.data(), h.size()); } diff --git a/src/v/iceberg/rest_client/tests/catalog_client_tests.cc b/src/v/iceberg/rest_client/tests/catalog_client_tests.cc index 015fde58d9594..d65e60a530bac 100644 --- a/src/v/iceberg/rest_client/tests/catalog_client_tests.cc +++ b/src/v/iceberg/rest_client/tests/catalog_client_tests.cc @@ -117,7 +117,7 @@ ss::future validate_token_request( std::ranges::sort(received); ss::sstring expected{ - "grant_type=client_credentials&scope=PRINCIPAL_ROLE%3aALL&client_secret=" + "grant_type=client_credentials&scope=PRINCIPAL_ROLE%3AALL&client_secret=" "secret&client_id=id"}; std::ranges::sort(expected); diff --git a/src/v/iceberg/tests/rest_catalog_test.cc b/src/v/iceberg/tests/rest_catalog_test.cc index e2c72599fd35d..98e3fe10d5190 100644 --- a/src/v/iceberg/tests/rest_catalog_test.cc +++ b/src/v/iceberg/tests/rest_catalog_test.cc @@ -123,7 +123,7 @@ ss::future handle_token_request( EXPECT_TRUE(query_params_equal( absl::flat_hash_map{ {"grant_type", "client_credentials"}, - {"scope", "PRINCIPAL_ROLE%3aALL"}, + {"scope", "PRINCIPAL_ROLE%3AALL"}, {"client_secret", get_credentials().client_secret}, {"client_id", get_credentials().client_id}}, received));