Skip to content

Commit d90e175

Browse files
committed
GH-46214: [C++] Improve S3 client initialization
1 parent 66733a1 commit d90e175

File tree

2 files changed

+48
-28
lines changed

2 files changed

+48
-28
lines changed

cpp/src/arrow/filesystem/s3fs.cc

Lines changed: 42 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1108,91 +1108,103 @@ class EndpointProviderCache {
11081108

11091109
class ClientBuilder {
11101110
public:
1111-
explicit ClientBuilder(S3Options options) : options_(std::move(options)) {}
1111+
explicit ClientBuilder(S3Options options) : options_(std::move(options)) {
1112+
// The ClientConfiguration constructor always does a current region lookup
1113+
// via EC2 metadata, unless IMDS is disabled (GH-46214).
1114+
client_config_.emplace(/*useSmartDefaults=*/true, options_.smart_defaults,
1115+
/*shouldDisableIMDS=*/true);
1116+
}
11121117

1113-
const Aws::Client::ClientConfiguration& config() const { return client_config_; }
1118+
const Aws::Client::ClientConfiguration& config() const {
1119+
DCHECK(client_config_.has_value());
1120+
return *client_config_;
1121+
}
11141122

1115-
Aws::Client::ClientConfiguration* mutable_config() { return &client_config_; }
1123+
Aws::Client::ClientConfiguration* mutable_config() {
1124+
DCHECK(client_config_.has_value());
1125+
return &*client_config_;
1126+
}
11161127

11171128
Result<std::shared_ptr<S3ClientHolder>> BuildClient(
11181129
std::optional<io::IOContext> io_context = std::nullopt) {
11191130
credentials_provider_ = options_.credentials_provider;
11201131
if (!options_.region.empty()) {
1121-
client_config_.region = ToAwsString(options_.region);
1132+
client_config_->region = ToAwsString(options_.region);
11221133
}
11231134
if (options_.request_timeout > 0) {
11241135
// Use ceil() to avoid setting it to 0 as that probably means no timeout.
1125-
client_config_.requestTimeoutMs =
1136+
client_config_->requestTimeoutMs =
11261137
static_cast<long>(ceil(options_.request_timeout * 1000)); // NOLINT runtime/int
11271138
}
11281139
if (options_.connect_timeout > 0) {
1129-
client_config_.connectTimeoutMs =
1140+
client_config_->connectTimeoutMs =
11301141
static_cast<long>(ceil(options_.connect_timeout * 1000)); // NOLINT runtime/int
11311142
}
11321143

1133-
client_config_.endpointOverride = ToAwsString(options_.endpoint_override);
1144+
client_config_->endpointOverride = ToAwsString(options_.endpoint_override);
11341145
if (options_.scheme == "http") {
1135-
client_config_.scheme = Aws::Http::Scheme::HTTP;
1146+
client_config_->scheme = Aws::Http::Scheme::HTTP;
11361147
} else if (options_.scheme == "https") {
1137-
client_config_.scheme = Aws::Http::Scheme::HTTPS;
1148+
client_config_->scheme = Aws::Http::Scheme::HTTPS;
11381149
} else {
11391150
return Status::Invalid("Invalid S3 connection scheme '", options_.scheme, "'");
11401151
}
11411152
if (options_.retry_strategy) {
1142-
client_config_.retryStrategy =
1153+
client_config_->retryStrategy =
11431154
std::make_shared<WrappedRetryStrategy>(options_.retry_strategy);
11441155
} else {
1145-
client_config_.retryStrategy = std::make_shared<ConnectRetryStrategy>();
1156+
client_config_->retryStrategy = std::make_shared<ConnectRetryStrategy>();
11461157
}
11471158
if (!options_.tls_ca_file_path.empty()) {
1148-
client_config_.caFile = ToAwsString(options_.tls_ca_file_path);
1159+
client_config_->caFile = ToAwsString(options_.tls_ca_file_path);
11491160
} else if (!internal::global_options.tls_ca_file_path.empty()) {
1150-
client_config_.caFile = ToAwsString(internal::global_options.tls_ca_file_path);
1161+
client_config_->caFile = ToAwsString(internal::global_options.tls_ca_file_path);
11511162
}
11521163
if (!options_.tls_ca_dir_path.empty()) {
1153-
client_config_.caPath = ToAwsString(options_.tls_ca_dir_path);
1164+
client_config_->caPath = ToAwsString(options_.tls_ca_dir_path);
11541165
} else if (!internal::global_options.tls_ca_dir_path.empty()) {
1155-
client_config_.caPath = ToAwsString(internal::global_options.tls_ca_dir_path);
1166+
client_config_->caPath = ToAwsString(internal::global_options.tls_ca_dir_path);
11561167
}
1157-
client_config_.verifySSL = options_.tls_verify_certificates;
1168+
client_config_->verifySSL = options_.tls_verify_certificates;
11581169

11591170
// Set proxy options if provided
11601171
if (!options_.proxy_options.scheme.empty()) {
11611172
if (options_.proxy_options.scheme == "http") {
1162-
client_config_.proxyScheme = Aws::Http::Scheme::HTTP;
1173+
client_config_->proxyScheme = Aws::Http::Scheme::HTTP;
11631174
} else if (options_.proxy_options.scheme == "https") {
1164-
client_config_.proxyScheme = Aws::Http::Scheme::HTTPS;
1175+
client_config_->proxyScheme = Aws::Http::Scheme::HTTPS;
11651176
} else {
11661177
return Status::Invalid("Invalid proxy connection scheme '",
11671178
options_.proxy_options.scheme, "'");
11681179
}
11691180
}
11701181
if (!options_.proxy_options.host.empty()) {
1171-
client_config_.proxyHost = ToAwsString(options_.proxy_options.host);
1182+
client_config_->proxyHost = ToAwsString(options_.proxy_options.host);
11721183
}
11731184
if (options_.proxy_options.port != -1) {
1174-
client_config_.proxyPort = options_.proxy_options.port;
1185+
client_config_->proxyPort = options_.proxy_options.port;
11751186
}
11761187
if (!options_.proxy_options.username.empty()) {
1177-
client_config_.proxyUserName = ToAwsString(options_.proxy_options.username);
1188+
client_config_->proxyUserName = ToAwsString(options_.proxy_options.username);
11781189
}
11791190
if (!options_.proxy_options.password.empty()) {
1180-
client_config_.proxyPassword = ToAwsString(options_.proxy_options.password);
1191+
client_config_->proxyPassword = ToAwsString(options_.proxy_options.password);
11811192
}
11821193

11831194
if (io_context) {
11841195
// TODO: Once ARROW-15035 is done we can get rid of the "at least 25" fallback
1185-
client_config_.maxConnections = std::max(io_context->executor()->GetCapacity(), 25);
1196+
client_config_->maxConnections =
1197+
std::max(io_context->executor()->GetCapacity(), 25);
11861198
}
11871199

11881200
const bool use_virtual_addressing =
11891201
options_.endpoint_override.empty() || options_.force_virtual_addressing;
11901202

11911203
#ifdef ARROW_S3_HAS_S3CLIENT_CONFIGURATION
1192-
client_config_.useVirtualAddressing = use_virtual_addressing;
1193-
auto endpoint_provider = EndpointProviderCache::Instance()->Lookup(client_config_);
1204+
client_config_->useVirtualAddressing = use_virtual_addressing;
1205+
auto endpoint_provider = EndpointProviderCache::Instance()->Lookup(*client_config_);
11941206
auto client = std::make_shared<S3Client>(credentials_provider_, endpoint_provider,
1195-
client_config_);
1207+
*client_config_);
11961208
#else
11971209
auto client = std::make_shared<S3Client>(
11981210
credentials_provider_, client_config_,
@@ -1207,10 +1219,12 @@ class ClientBuilder {
12071219

12081220
protected:
12091221
S3Options options_;
1222+
// GH-46214: the ClientConfiguration default constructor may issue arbitrary metadata
1223+
// calls, defer construction until necessary.
12101224
#ifdef ARROW_S3_HAS_S3CLIENT_CONFIGURATION
1211-
Aws::S3::S3ClientConfiguration client_config_;
1225+
std::optional<Aws::S3::S3ClientConfiguration> client_config_;
12121226
#else
1213-
Aws::Client::ClientConfiguration client_config_;
1227+
std::optional<Aws::Client::ClientConfiguration> client_config_;
12141228
#endif
12151229
std::shared_ptr<Aws::Auth::AWSCredentialsProvider> credentials_provider_;
12161230
};

cpp/src/arrow/filesystem/s3fs.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,12 @@ class ARROW_EXPORT S3RetryStrategy {
9696

9797
/// Options for the S3FileSystem implementation.
9898
struct ARROW_EXPORT S3Options {
99+
/// \brief Smart defaults for option values
100+
///
101+
/// The possible values for this setting are explained in the AWS docs:
102+
/// https://docs.aws.amazon.com/sdkref/latest/guide/feature-smart-config-defaults.html
103+
const char* smart_defaults = "standard";
104+
99105
/// \brief AWS region to connect to.
100106
///
101107
/// If unset, the AWS SDK will choose a default value. The exact algorithm

0 commit comments

Comments
 (0)