Skip to content

Commit 7700fc9

Browse files
committed
Add support for AWS S3 through s3a:// as a filesystem to store Iceberg data and metadata.
1 parent d3930a2 commit 7700fc9

File tree

5 files changed

+164
-2
lines changed

5 files changed

+164
-2
lines changed

Dockerfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ RUN \
2626
useradd iceberg --uid 1000 --gid 1000 --create-home
2727

2828
COPY --from=builder --chown=iceberg:iceberg /app/build/libs/iceberg-rest-image-all.jar /usr/lib/iceberg-rest/iceberg-rest-image-all.jar
29+
RUN mkdir -p /etc/hadoop/conf
30+
COPY static/core-site.xml /etc/hadoop/conf/.
2931

3032
ENV CATALOG_CATALOG__IMPL=org.apache.iceberg.jdbc.JdbcCatalog
3133
ENV CATALOG_URI=jdbc:sqlite:file:/tmp/iceberg_rest_mode=memory

build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ dependencies {
2121

2222
implementation 'org.apache.hadoop:hadoop-common:3.3.4'
2323
implementation 'org.apache.hadoop:hadoop-hdfs-client:3.3.4'
24+
implementation 'org.apache.hadoop:hadoop-aws:3.3.4'
2425

2526
runtimeOnly "software.amazon.awssdk:url-connection-client:${awsSdkVersion}"
2627
runtimeOnly "software.amazon.awssdk:apache-client:${awsSdkVersion}"
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.iceberg.rest;
20+
21+
import java.io.File;
22+
23+
import org.apache.hadoop.conf.Configuration;
24+
import org.slf4j.Logger;
25+
import org.slf4j.LoggerFactory;
26+
27+
/**
28+
* Utility class to work with Apache Hadoop MapRed classes.
29+
*/
30+
public final class HadoopUtils {
31+
private static final Logger LOG = LoggerFactory.getLogger(RESTCatalogServer.class);
32+
33+
/**
34+
* Returns a new Hadoop Configuration object using the path to the hadoop configuration
35+
* This method is public because its being used in the RESTCatalogServer.
36+
*/
37+
public static org.apache.hadoop.conf.Configuration getCoreSiteConfiguration() {
38+
Configuration retConf = new org.apache.hadoop.conf.Configuration();
39+
40+
// We need to load both core-site.xml to determine the default fs path
41+
// Approach environment variables
42+
if (LOG.isDebugEnabled()) {
43+
LOG.debug(
44+
"Building possible paths to core-site.xml for hadoop configuration");
45+
}
46+
String[] possibleHadoopConfPaths = new String[3];
47+
possibleHadoopConfPaths[0] = System.getenv("HADOOP_CONF_DIR");
48+
49+
if (System.getenv("HADOOP_HOME") != null) {
50+
possibleHadoopConfPaths[1] = System.getenv("HADOOP_HOME") + "/conf";
51+
possibleHadoopConfPaths[2] = System.getenv("HADOOP_HOME") + "/etc/hadoop"; // hadoop 2.2
52+
}
53+
54+
for (String possibleHadoopConfPath : possibleHadoopConfPaths) {
55+
if (possibleHadoopConfPath != null) {
56+
if (LOG.isDebugEnabled()) {
57+
LOG.debug(
58+
"Found possibleHadoopConfPath entry: " + possibleHadoopConfPath);
59+
}
60+
if (new File(possibleHadoopConfPath).exists()) {
61+
if (LOG.isDebugEnabled()) {
62+
LOG.debug(
63+
"possibleHadoopConfPath entry (" + possibleHadoopConfPath + ") exists.");
64+
}
65+
if (new File(possibleHadoopConfPath + "/core-site.xml").exists()) {
66+
if (LOG.isDebugEnabled()) {
67+
LOG.debug(
68+
"Core Site config (" + possibleHadoopConfPath + "/core-site.xml) exists.");
69+
}
70+
retConf.addResource(
71+
new org.apache.hadoop.fs.Path(possibleHadoopConfPath + "/core-site.xml"));
72+
73+
if (LOG.isDebugEnabled()) {
74+
LOG.debug(
75+
"Adding " + possibleHadoopConfPath + "/core-site.xml to hadoop configuration");
76+
}
77+
}
78+
}
79+
}
80+
}
81+
return retConf;
82+
}
83+
84+
/**
85+
* Private constructor to prevent instantiation.
86+
*/
87+
private HadoopUtils() {
88+
throw new RuntimeException();
89+
}
90+
}

src/main/java/org/apache/iceberg/rest/RESTCatalogServer.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
import java.util.Locale;
2525
import java.util.Map;
2626
import java.util.stream.Collectors;
27-
import org.apache.hadoop.conf.Configuration;
2827
import org.apache.iceberg.CatalogProperties;
2928
import org.apache.iceberg.CatalogUtil;
3029
import org.apache.iceberg.catalog.Catalog;
@@ -80,7 +79,7 @@ private static Catalog backendCatalog() throws IOException {
8079
}
8180

8281
LOG.info("Creating catalog with properties: {}", catalogProperties);
83-
return CatalogUtil.buildIcebergCatalog("rest_backend", catalogProperties, new Configuration());
82+
return CatalogUtil.buildIcebergCatalog("rest_backend", catalogProperties, HadoopUtils.getCoreSiteConfiguration());
8483
}
8584

8685
public static void main(String[] args) throws Exception {

static/core-site.xml

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
3+
<!--
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License. See accompanying LICENSE file.
15+
-->
16+
17+
<!-- Put site-specific property overrides in this file. -->
18+
19+
<configuration>
20+
<property>
21+
<name>fs.s3a.aws.credentials.provider</name>
22+
<value>
23+
org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider,
24+
org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider,
25+
com.amazonaws.auth.EnvironmentVariableCredentialsProvider,
26+
com.amazonaws.auth.InstanceProfileCredentialsProvider,
27+
com.amazonaws.auth.WebIdentityTokenCredentialsProvider
28+
</value>
29+
<description>
30+
Comma-separated class names of credential provider classes which implement
31+
com.amazonaws.auth.AWSCredentialsProvider.
32+
33+
When S3A delegation tokens are not enabled, this list will be used
34+
to directly authenticate with S3 and DynamoDB services.
35+
When S3A Delegation tokens are enabled, depending upon the delegation
36+
token binding it may be used to communicate with the STS endpoint to
37+
request session/role credentials.
38+
39+
These are loaded and queried in sequence for a valid set of credentials.
40+
Each listed class must implement one of the following means of
41+
construction, which are attempted in order:
42+
* a public constructor accepting java.net.URI and
43+
org.apache.hadoop.conf.Configuration,
44+
* a public constructor accepting org.apache.hadoop.conf.Configuration,
45+
* a public static method named getInstance that accepts no
46+
arguments and returns an instance of
47+
com.amazonaws.auth.AWSCredentialsProvider, or
48+
* a public default constructor.
49+
50+
(DISABLED) Specifying org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider
51+
allows anonymous access to a publicly accessible S3 bucket without any
52+
credentials. Please note that allowing anonymous access to an S3 bucket
53+
compromises security and therefore is unsuitable for most use cases. It can
54+
be useful for accessing public data sets without requiring AWS credentials.
55+
56+
If unspecified, then the default list of credential provider classes,
57+
queried in sequence, is:
58+
* org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider: looks
59+
for session login secrets in the Hadoop configuration.
60+
* org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider:
61+
Uses the values of fs.s3a.access.key and fs.s3a.secret.key.
62+
* com.amazonaws.auth.EnvironmentVariableCredentialsProvider: supports
63+
configuration of AWS access key ID and secret access key in
64+
environment variables named AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY,
65+
and AWS_SESSION_TOKEN as documented in the AWS SDK.
66+
* com.amazonaws.auth.InstanceProfileCredentialsProvider: picks up
67+
IAM credentials of any EC2 VM or AWS container in which the process is running.
68+
</description>
69+
</property>
70+
</configuration>

0 commit comments

Comments
 (0)