Skip to content

Commit bc76eb1

Browse files
authored
HADOOP-19855. Replace native zstd C bindings with zstd-jni library (#8399)
Reviewed-by: Akira Ajisaka <aajisaka@apache.org> Reviewed-by: Shilun Fan <slfan1989@apache.org> Signed-off-by: Cheng Pan <chengpan@apache.org>
1 parent 21eb8ee commit bc76eb1

28 files changed

Lines changed: 161 additions & 880 deletions

File tree

BUILDING.txt

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,6 @@ Optional packages:
107107
$ sudo apt-get install bzip2 libbz2-dev
108108
* Linux FUSE
109109
$ sudo apt-get install fuse libfuse-dev
110-
* ZStandard compression
111-
$ sudo apt-get install libzstd1-dev
112110
* PMDK library for storage class memory(SCM) as HDFS cache backend
113111
Please refer to http://pmem.io/ and https://github.com/pmem/pmdk
114112

@@ -196,29 +194,6 @@ Maven build goals:
196194
and it ignores the -Dsnappy.prefix option. If -Dsnappy.lib isn't given, the
197195
bundling and building will fail.
198196

199-
200-
ZStandard build options:
201-
202-
ZStandard is a compression library that can be utilized by the native code.
203-
It is currently an optional component, meaning that Hadoop can be built with
204-
or without this dependency.
205-
206-
* Use -Drequire.zstd to fail the build if libzstd.so is not found.
207-
If this option is not specified and the zstd library is missing.
208-
209-
* Use -Dzstd.prefix to specify a nonstandard location for the libzstd
210-
header files and library files. You do not need this option if you have
211-
installed zstandard using a package manager.
212-
213-
* Use -Dzstd.lib to specify a nonstandard location for the libzstd library
214-
files. Similarly to zstd.prefix, you do not need this option if you have
215-
installed using a package manager.
216-
217-
* Use -Dbundle.zstd to copy the contents of the zstd.lib directory into
218-
the final tar file. This option requires that -Dzstd.lib is also given,
219-
and it ignores the -Dzstd.prefix option. If -Dzstd.lib isn't given, the
220-
bundling and building will fail.
221-
222197
OpenSSL build options:
223198

224199
OpenSSL includes a crypto library that can be utilized by the native code.
@@ -556,10 +531,6 @@ Building on Rocky Linux 8
556531
* Install optional dependencies (snappy-devel).
557532
$ sudo dnf --enablerepo=PowerTools install snappy-devel
558533

559-
* Install optional dependencies (libzstd-devel).
560-
$ sudo dnf install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm
561-
$ sudo dnf --enablerepo=epel install libzstd-devel
562-
563534
* Install optional dependencies (isa-l).
564535
$ sudo dnf --enablerepo=PowerTools install nasm
565536
$ git clone https://github.com/intel/isa-l

dev-support/docker/Dockerfile_rockylinux_8

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,6 @@ RUN pkg-resolver/install-maven.sh rockylinux:8
117117
RUN pkg-resolver/install-boost.sh rockylinux:8
118118
RUN pkg-resolver/install-spotbugs.sh rockylinux:8
119119
RUN pkg-resolver/install-protobuf.sh rockylinux:8
120-
RUN pkg-resolver/install-zstandard.sh rockylinux:8
121120
RUN pkg-resolver/install-intel-isa-l.sh rockylinux:8
122121
RUN pkg-resolver/install-common-pkgs.sh
123122

dev-support/docker/pkg-resolver/install-zstandard.sh

Lines changed: 0 additions & 53 deletions
This file was deleted.

dev-support/docker/pkg-resolver/packages.json

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -222,19 +222,15 @@
222222
},
223223
"zlib": {
224224
"debian:12": [
225-
"libzstd-dev",
226225
"zlib1g-dev"
227226
],
228227
"debian:13": [
229-
"libzstd-dev",
230228
"zlib1g-dev"
231229
],
232230
"ubuntu:noble": [
233-
"libzstd-dev",
234231
"zlib1g-dev"
235232
],
236233
"ubuntu:noble::arch64": [
237-
"libzstd-dev",
238234
"zlib1g-dev"
239235
],
240236
"rockylinux:8": [

hadoop-client-modules/hadoop-client-api/pom.xml

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,13 +67,17 @@
6767
</exclusion>
6868
</exclusions>
6969
</dependency>
70-
<!-- snappy-java is native library and cannot be relocated. So we explicitly exclude it
71-
from shaded jar to prevent possible conflict. Make it as transitive dependency to
72-
make the downstream pull it. -->
70+
<!-- snappy-java/zstd-jni contains native library and cannot be relocated.
71+
So we explicitly exclude it from shaded jar to prevent possible conflict.
72+
Declare it as transitive dependency to make the downstream pull it. -->
7373
<dependency>
7474
<groupId>org.xerial.snappy</groupId>
7575
<artifactId>snappy-java</artifactId>
7676
</dependency>
77+
<dependency>
78+
<groupId>com.github.luben</groupId>
79+
<artifactId>zstd-jni</artifactId>
80+
</dependency>
7781
</dependencies>
7882
<profiles>
7983
<profile>
@@ -106,8 +110,9 @@
106110
<include>org.apache.hadoop:*</include>
107111
</includes>
108112
<excludes>
109-
<!-- Leave snappy that includes native methods which cannot be relocated. -->
113+
<!-- Leave snappy/zstd-jni that includes native methods which cannot be relocated. -->
110114
<exclude>org.xerial.snappy:*</exclude>
115+
<exclude>com.github.luben:zstd-jni</exclude>
111116
</excludes>
112117
</artifactSet>
113118
<filters>
@@ -173,6 +178,9 @@
173178
<exclude>com/sun/management/**/*</exclude>
174179
<exclude>com/ibm/security/*</exclude>
175180
<exclude>com/ibm/security/**/*</exclude>
181+
<!-- Exclude zstd-jni -->
182+
<exclude>com/github/luben/zstd/*</exclude>
183+
<exclude>com/github/luben/zstd/**/*</exclude>
176184
</excludes>
177185
</relocation>
178186
<relocation>

hadoop-client-modules/hadoop-client-check-invariants/pom.xml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,9 @@
9292
<exclude>org.glassfish.jersey:*</exclude>
9393
<!-- Leave bouncycastle unshaded because it's signed with a special Oracle certificate so it can be a custom JCE security provider -->
9494
<exclude>org.bouncycastle:*</exclude>
95-
<!-- Leave snappy that includes native methods which cannot be relocated. -->
95+
<!-- Leave snappy/zstd-jni that includes native methods which cannot be relocated. -->
9696
<exclude>org.xerial.snappy:*</exclude>
97+
<exclude>com.github.luben:zstd-jni</exclude>
9798
</excludes>
9899
</banTransitiveDependencies>
99100
<banDuplicateClasses>

hadoop-client-modules/hadoop-client-check-test-invariants/pom.xml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,9 @@
9898
<exclude>com.google.code.findbugs:jsr305</exclude>
9999
<!-- Leave bouncycastle unshaded because it's signed with a special Oracle certificate so it can be a custom JCE security provider -->
100100
<exclude>org.bouncycastle:*</exclude>
101-
<!-- Leave snappy that includes native methods which cannot be relocated. -->
101+
<!-- Leave snappy/zstd-jni that includes native methods which cannot be relocated. -->
102102
<exclude>org.xerial.snappy:*</exclude>
103+
<exclude>com.github.luben:zstd-jni</exclude>
103104
<exclude>org.ehcache:*</exclude>
104105
<exclude>org.glassfish.jersey:*</exclude>
105106
</excludes>

hadoop-client-modules/hadoop-client-minicluster/pom.xml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,11 @@
4646
<artifactId>snappy-java</artifactId>
4747
<scope>runtime</scope>
4848
</dependency>
49+
<dependency>
50+
<groupId>com.github.luben</groupId>
51+
<artifactId>zstd-jni</artifactId>
52+
<scope>runtime</scope>
53+
</dependency>
4954
<dependency>
5055
<groupId>org.apache.hadoop</groupId>
5156
<artifactId>hadoop-client-runtime</artifactId>
@@ -707,8 +712,9 @@
707712
<!-- We need a filter that matches just those things that are included in the above artiacts -->
708713
<!-- Leave bouncycastle unshaded because it's signed with a special Oracle certificate so it can be a custom JCE security provider -->
709714
<exclude>org.bouncycastle:*</exclude>
710-
<!-- Leave snappy that includes native methods which cannot be relocated. -->
715+
<!-- Leave snappy/zstd-jni that includes native methods which cannot be relocated. -->
711716
<exclude>org.xerial.snappy:*</exclude>
717+
<exclude>com.github.luben:zstd-jni</exclude>
712718
<exclude>org.glassfish.jersey:*</exclude>
713719
</excludes>
714720
</artifactSet>
@@ -947,6 +953,9 @@
947953
<exclude>com/sun/management/**/*</exclude>
948954
<exclude>com/ibm/security/*</exclude>
949955
<exclude>com/ibm/security/**/*</exclude>
956+
<!-- Exclude zstd-jni -->
957+
<exclude>com/github/luben/zstd/*</exclude>
958+
<exclude>com/github/luben/zstd/**/*</exclude>
950959
</excludes>
951960
</relocation>
952961
<relocation>

hadoop-client-modules/hadoop-client-runtime/pom.xml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,11 @@
6666
<artifactId>snappy-java</artifactId>
6767
<scope>runtime</scope>
6868
</dependency>
69+
<dependency>
70+
<groupId>com.github.luben</groupId>
71+
<artifactId>zstd-jni</artifactId>
72+
<scope>runtime</scope>
73+
</dependency>
6974
<!-- This comes from our parent pom. If we don't expressly change it here to get included,
7075
downstream will get warnings at compile time. -->
7176
<dependency>
@@ -170,6 +175,7 @@
170175
<exclude>org.bouncycastle:*</exclude>
171176
<!-- Leave snappy that includes native methods which cannot be relocated. -->
172177
<exclude>org.xerial.snappy:*</exclude>
178+
<exclude>com.github.luben:zstd-jni</exclude>
173179
<!-- leave out kotlin classes -->
174180
<exclude>org.jetbrains.kotlin:*</exclude>
175181
<exclude>org.glassfish.jersey.test-framework:*</exclude>
@@ -320,6 +326,9 @@
320326
<exclude>com/sun/management/**/*</exclude>
321327
<exclude>com/ibm/security/*</exclude>
322328
<exclude>com/ibm/security/**/*</exclude>
329+
<!-- Exclude zstd-jni -->
330+
<exclude>com/github/luben/zstd/*</exclude>
331+
<exclude>com/github/luben/zstd/**/*</exclude>
323332
</excludes>
324333
</relocation>
325334
<relocation>

hadoop-common-project/hadoop-common/pom.xml

Lines changed: 5 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,11 @@
370370
<artifactId>lz4-java</artifactId>
371371
<scope>provided</scope>
372372
</dependency>
373+
<dependency>
374+
<groupId>com.github.luben</groupId>
375+
<artifactId>zstd-jni</artifactId>
376+
<scope>compile</scope>
377+
</dependency>
373378
<dependency>
374379
<groupId>org.junit.jupiter</groupId>
375380
<artifactId>junit-jupiter-api</artifactId>
@@ -696,10 +701,6 @@
696701
</activation>
697702
<properties>
698703
<require.bzip2>false</require.bzip2>
699-
<zstd.prefix></zstd.prefix>
700-
<zstd.lib></zstd.lib>
701-
<zstd.include></zstd.include>
702-
<require.zstd>false</require.zstd>
703704
<openssl.prefix></openssl.prefix>
704705
<openssl.lib></openssl.lib>
705706
<openssl.include></openssl.include>
@@ -749,10 +750,6 @@
749750
<GENERATED_JAVAH>${project.build.directory}/native/javah</GENERATED_JAVAH>
750751
<JVM_ARCH_DATA_MODEL>${sun.arch.data.model}</JVM_ARCH_DATA_MODEL>
751752
<REQUIRE_BZIP2>${require.bzip2}</REQUIRE_BZIP2>
752-
<REQUIRE_ZSTD>${require.zstd}</REQUIRE_ZSTD>
753-
<CUSTOM_ZSTD_PREFIX>${zstd.prefix}</CUSTOM_ZSTD_PREFIX>
754-
<CUSTOM_ZSTD_LIB>${zstd.lib} </CUSTOM_ZSTD_LIB>
755-
<CUSTOM_ZSTD_INCLUDE>${zstd.include} </CUSTOM_ZSTD_INCLUDE>
756753
<REQUIRE_ISAL>${require.isal} </REQUIRE_ISAL>
757754
<CUSTOM_ISAL_PREFIX>${isal.prefix} </CUSTOM_ISAL_PREFIX>
758755
<CUSTOM_ISAL_LIB>${isal.lib} </CUSTOM_ISAL_LIB>
@@ -807,11 +804,6 @@
807804
<require.isal>false</require.isal>
808805
<isal.prefix></isal.prefix>
809806
<isal.lib></isal.lib>
810-
<zstd.prefix></zstd.prefix>
811-
<zstd.lib></zstd.lib>
812-
<zstd.include></zstd.include>
813-
<require.zstd>false</require.zstd>
814-
<bundle.zstd.in.bin>true</bundle.zstd.in.bin>
815807
<openssl.prefix></openssl.prefix>
816808
<openssl.lib></openssl.lib>
817809
<openssl.include></openssl.include>
@@ -961,10 +953,6 @@
961953
<argument>/nologo</argument>
962954
<argument>/p:Configuration=Release</argument>
963955
<argument>/p:OutDir=${project.build.directory}/bin/</argument>
964-
<argument>/p:CustomZstdPrefix=${zstd.prefix}</argument>
965-
<argument>/p:CustomZstdLib=${zstd.lib}</argument>
966-
<argument>/p:CustomZstdInclude=${zstd.include}</argument>
967-
<argument>/p:RequireZstd=${require.zstd}</argument>
968956
<argument>/p:CustomOpensslPrefix=${openssl.prefix}</argument>
969957
<argument>/p:CustomOpensslLib=${openssl.lib}</argument>
970958
<argument>/p:CustomOpensslInclude=${openssl.include}</argument>
@@ -989,10 +977,6 @@
989977
<argument>/nologo</argument>
990978
<argument>/p:Configuration=Release</argument>
991979
<argument>/p:OutDir=${project.build.directory}/bin/</argument>
992-
<argument>/p:CustomZstdPrefix=${zstd.prefix}</argument>
993-
<argument>/p:CustomZstdLib=${zstd.lib}</argument>
994-
<argument>/p:CustomZstdInclude=${zstd.include}</argument>
995-
<argument>/p:RequireZstd=${require.zstd}</argument>
996980
<argument>/p:CustomOpensslPrefix=${openssl.prefix}</argument>
997981
<argument>/p:CustomOpensslLib=${openssl.lib}</argument>
998982
<argument>/p:CustomOpensslInclude=${openssl.include}</argument>

0 commit comments

Comments
 (0)