Skip to content

Commit 9995ad4

Browse files
committed
Modify spark image to include azure and gcs dependency jars
1 parent d37f605 commit 9995ad4

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

testing/spark3-iceberg/Dockerfile

+6
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,12 @@ WORKDIR ${SPARK_HOME}/jars
3535
# install AWS SDK so we can access S3; the version must match the hadoop-* jars which are part of SPARK distribution
3636
RUN wget -nv "https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/3.3.4/hadoop-aws-3.3.4.jar"
3737
RUN wget -nv "https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.12.319/aws-java-sdk-bundle-1.12.319.jar"
38+
# install Azure SDK so we can access azure file system; the version must match the hadoop-* jars which are part of SPARK distribution
39+
RUN wget -nv https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-azure/3.3.4/hadoop-azure-3.3.4.jar
40+
RUN wget -nv https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-azure-datalake/3.3.6/hadoop-azure-datalake-3.3.6.jar
41+
RUN wget -nv https://repo1.maven.org/maven2/com/microsoft/azure/azure-storage/8.6.6/azure-storage-8.6.6.jar
42+
# install Google Hadoop connector so we can access gcs; the version must match the hadoop-* jars which are part of SPARK distribution
43+
RUN wget -nv https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop2-latest.jar
3844

3945
# install Iceberg
4046
RUN wget -nv "https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-${ICEBERG_JAR_VERSION}/${ICEBERG_VERSION}/iceberg-spark-runtime-${ICEBERG_JAR_VERSION}-${ICEBERG_VERSION}.jar"

0 commit comments

Comments
 (0)