Skip to content

Commit 3ba7b1b

Browse files
iii-ifneddy
andcommitted
s390x: vectorize crc32
Use vector extensions when compiling for s390x and binutils knows about them. At runtime, check whether kernel supports vector extensions (it has to be not just the CPU, but also the kernel) and choose between the regular and the vectorized implementations. Co-authored-by: Eduard Stefes <[email protected]>
1 parent 2209f63 commit 3ba7b1b

File tree

11 files changed

+448
-2
lines changed

11 files changed

+448
-2
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ check_include_file(unistd.h HAVE_UNISTD_H)
103103
if(MSVC)
104104
set(CMAKE_REQUIRED_FLAGS "-WX")
105105
else(MSVC)
106-
set(CMAKE_REQUIRED_FLAGS "-WError")
106+
set(CMAKE_REQUIRED_FLAGS "-Werror")
107107
endif(MSVC)
108108

109109
check_c_source_compiles(

Makefile.in

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ LDFLAGS=
2727
TEST_LIBS=-L. libz.a
2828
LDSHARED=$(CC)
2929
CPP=$(CC) -E
30+
VGFMAFLAG=
3031

3132
STATICLIB=libz.a
3233
SHAREDLIB=libz.so
@@ -164,6 +165,9 @@ adler32.o: $(SRCDIR)adler32.c
164165
crc32.o: $(SRCDIR)crc32.c
165166
$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)crc32.c
166167

168+
crc32_vx.o: $(SRCDIR)contrib/crc32vx/crc32_vx.c
169+
$(CC) $(CFLAGS) $(VGFMAFLAG) $(ZINC) -c -o $@ $(SRCDIR)contrib/crc32vx/crc32_vx.c
170+
167171
deflate.o: $(SRCDIR)deflate.c
168172
$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)deflate.c
169173

@@ -214,6 +218,11 @@ crc32.lo: $(SRCDIR)crc32.c
214218
$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32.o $(SRCDIR)crc32.c
215219
-@mv objs/crc32.o $@
216220

221+
crc32_vx.lo: $(SRCDIR)contrib/crc32vx/crc32_vx.c
222+
-@mkdir objs 2>/dev/null || test -d objs
223+
$(CC) $(SFLAGS) $(VGFMAFLAG) $(ZINC) -DPIC -c -o objs/crc32_vx.o $(SRCDIR)contrib/crc32vx/crc32_vx.c
224+
-@mv objs/crc32_vx.o $@
225+
217226
deflate.lo: $(SRCDIR)deflate.c
218227
-@mkdir objs 2>/dev/null || test -d objs
219228
$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/deflate.o $(SRCDIR)deflate.c
@@ -406,6 +415,7 @@ infback.o inflate.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)inftrees.
406415
inffast.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)inftrees.h $(SRCDIR)inflate.h $(SRCDIR)inffast.h
407416
inftrees.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)inftrees.h
408417
trees.o: $(SRCDIR)deflate.h $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)trees.h
418+
crc32_vx.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)contrib/crc32vx/crc32_vx_hooks.h
409419

410420
adler32.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h
411421
zutil.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)gzguts.h
@@ -417,3 +427,4 @@ infback.lo inflate.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)inftree
417427
inffast.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)inftrees.h $(SRCDIR)inflate.h $(SRCDIR)inffast.h
418428
inftrees.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)inftrees.h
419429
trees.lo: $(SRCDIR)deflate.h $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)trees.h
430+
crc32_vx.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)contrib/crc32vx/crc32_vx_hooks.h

configure

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ memory=0
9595
undefined=0
9696
insecure=0
9797
unknown=0
98+
enable_crcvx=1
9899
old_cc="$CC"
99100
old_cflags="$CFLAGS"
100101
OBJC='$(OBJZ) $(OBJG)'
@@ -122,6 +123,7 @@ case "$1" in
122123
echo ' configure [--const] [--zprefix] [--prefix=PREFIX] [--eprefix=EXPREFIX]' | tee -a configure.log
123124
echo ' [--insecure] [--static] [--64] [--libdir=LIBDIR] [--sharedlibdir=LIBDIR]' | tee -a configure.log
124125
echo ' [--includedir=INCLUDEDIR] [--archs="-arch i386 -arch x86_64"]' | tee -a configure.log
126+
echo ' [--disable-crcvx]' | tee -a configure.log
125127
exit 0 ;;
126128
-p*=* | --prefix=*) prefix=`echo $1 | sed 's/.*=//'`; shift ;;
127129
-e*=* | --eprefix=*) exec_prefix=`echo $1 | sed 's/.*=//'`; shift ;;
@@ -150,6 +152,7 @@ case "$1" in
150152
--memory) memory=1; shift ;;
151153
--undefined) undefined=1; shift ;;
152154
--insecure) insecure=1; shift ;;
155+
--disable-crcvx) enable_crcvx=0; shift ;;
153156
*) unknown=1; echo "unknown option ignored: $1" | tee -a configure.log; shift;;
154157
esac
155158
done
@@ -888,6 +891,70 @@ EOF
888891
fi
889892
fi
890893

894+
# check for ibm s390x build
895+
HAVE_S390X=0
896+
cat > $test.c << EOF
897+
#ifndef __s390x__
898+
#error
899+
#endif
900+
EOF
901+
if try $CC -c $CFLAGS $test.c; then
902+
echo "Checking for s390x build ... Yes." | tee -a configure.log
903+
HAVE_S390X=1
904+
else
905+
echo "Checking for s390x build ... No." | tee -a configure.log
906+
fi
907+
908+
# check for ibm s390x vx vector extensions
909+
HAVE_S390X_VX=0
910+
if test $HAVE_S390X -eq 1 && test $enable_crcvx -eq 1 ; then
911+
# preset the compiler specific flags
912+
if test $clang -eq 1; then
913+
VGFMAFLAG=-fzvector
914+
else
915+
VGFMAFLAG=-mzarch
916+
fi
917+
918+
cat > $test.c <<EOF
919+
#ifndef __s390x__
920+
#error
921+
#endif
922+
#include <vecintrin.h>
923+
int main(void) {
924+
unsigned long long a __attribute__((vector_size(16))) = { 0 };
925+
unsigned long long b __attribute__((vector_size(16))) = { 0 };
926+
unsigned char c __attribute__((vector_size(16))) = { 0 };
927+
c = vec_gfmsum_accum_128(a, b, c);
928+
return c[0];
929+
}
930+
EOF
931+
932+
# cflags already contains a valid march
933+
if try $CC -c $CFLAGS $VGFMAFLAG $test.c; then
934+
echo "Checking for s390x vx vector extension ... Yes." | tee -a configure.log
935+
HAVE_S390X_VX=1
936+
# or set march for our compile units
937+
elif try $CC -c $CFLAGS $VGFMAFLAG -march=z13 $test.c; then
938+
echo "Checking for s390x vx vector extension (march=z13) ... Yes." | tee -a configure.log
939+
HAVE_S390X_VX=1
940+
VGFMAFLAG="$VGFMAFLAG -march=z13"
941+
# else we are not on s390x
942+
else
943+
echo "Checking for s390x vx vector extension ... No." | tee -a configure.log
944+
fi
945+
946+
# prepare compiling for s390x
947+
if test $HAVE_S390X_VX -eq 1; then
948+
CFLAGS="$CFLAGS -DHAVE_S390X_VX"
949+
SFLAGS="$SFLAGS -DHAVE_S390X_VX"
950+
OBJC="$OBJC crc32_vx.o"
951+
PIC_OBJC="$PIC_OBJC crc32_vx.lo"
952+
else
953+
# target has no vx extension
954+
VGFMAFLAG=""
955+
fi
956+
fi
957+
891958
# show the results in the log
892959
echo >> configure.log
893960
echo ALL = $ALL >> configure.log
@@ -919,6 +986,9 @@ echo mandir = $mandir >> configure.log
919986
echo prefix = $prefix >> configure.log
920987
echo sharedlibdir = $sharedlibdir >> configure.log
921988
echo uname = $uname >> configure.log
989+
echo HAVE_S390X = $HAVE_S390X >> configure.log
990+
echo HAVE_S390X_VX = $HAVE_S390X_VX >> configure.log
991+
echo VGFMAFLAG = $VGFMAFLAG >> configure.log
922992

923993
# update Makefile with the configure results
924994
sed < ${SRCDIR}Makefile.in "

contrib/CMakeLists.txt

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,15 @@ function(zlib_add_contrib_lib name description dir)
2424
endfunction(zlib_add_contrib_lib name description dir)
2525

2626
function(zlib_add_contrib_feature name description dir)
27+
if(ARGC EQUAL 4)
28+
set(default_on ${ARGV3})
29+
else()
30+
set(default_on Off)
31+
endif()
32+
2733
option(ZLIB_WITH_${name}
2834
"Enable build ${description}"
29-
OFF)
35+
${default_on})
3036

3137
if(ZLIB_WITH_${name})
3238
add_subdirectory(${dir}/)
@@ -38,6 +44,7 @@ zlib_add_contrib_feature("GVMAT64"
3844
gcc_gvmat64)
3945

4046
zlib_add_contrib_feature(INFBACK9 "with support for method 9 deflate" infback9)
47+
zlib_add_contrib_feature(CRC32VX "with S390X-CRC32VX implementation" crc32vx On)
4148
zlib_add_contrib_lib(ADA "Ada bindings" ada)
4249
zlib_add_contrib_lib(BLAST "blast binary" blast)
4350
zlib_add_contrib_lib(IOSTREAM3 "IOStream C++ bindings V3" iostream3)

contrib/README.contrib

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ puff/ by Mark Adler <[email protected]>
4646
Small, low memory usage inflate. Also serves to provide an
4747
unambiguous description of the deflate format.
4848

49+
crc32vx/ by Ilya Leoshkevich <[email protected]>
50+
Hardware-accelerated CRC32 on IBM Z with Z13 VX extension.
51+
4952
testzlib/ by Gilles Vollant <[email protected]>
5053
Example of the use of zlib
5154

contrib/crc32vx/CMakeLists.txt

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# check if we compile for IBM s390x
2+
#
3+
CHECK_C_SOURCE_COMPILES("
4+
#ifndef __s390x__
5+
#error
6+
#endif
7+
int main() {return 0;}
8+
" HAS_S390X_SUPPORT)
9+
10+
#
11+
# Check for IBM S390X - VX extensions
12+
#
13+
if(ZLIB_WITH_CRC32VX AND HAS_S390X_SUPPORT)
14+
# preset the compiler specific flags
15+
if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
16+
set(VGFMAFLAG "-fzvector")
17+
else()
18+
set(VGFMAFLAG "-mzarch")
19+
endif(CMAKE_C_COMPILER_ID STREQUAL "Clang")
20+
21+
set(S390X_VX_TEST
22+
"#ifndef __s390x__ \n\
23+
#error \n\
24+
#endif \n\
25+
#include <vecintrin.h> \n\
26+
int main(void) { \
27+
unsigned long long a __attribute__((vector_size(16))) = { 0 }; \
28+
unsigned long long b __attribute__((vector_size(16))) = { 0 }; \
29+
unsigned char c __attribute__((vector_size(16))) = { 0 }; \
30+
c = vec_gfmsum_accum_128(a, b, c); \
31+
return c[0]; \
32+
}")
33+
34+
# cflags already contains a valid march
35+
set(CMAKE_REQUIRED_FLAGS "${VGFMAFLAG}")
36+
check_c_source_compiles("${S390X_VX_TEST}" HAS_S390X_VX_SUPPORT)
37+
unset(CMAKE_REQUIRED_FLAGS)
38+
39+
# or set march for our compile units
40+
if(NOT HAS_S390X_VX_SUPPORT)
41+
set(CMAKE_REQUIRED_FLAGS "${VGFMAFLAG} -march=z13")
42+
check_c_source_compiles("${S390X_VX_TEST}" HAS_Z13_S390X_VX_SUPPORT)
43+
unset(CMAKE_REQUIRED_FLAGS )
44+
list(APPEND VGFMAFLAG "-march=z13")
45+
endif(NOT HAS_S390X_VX_SUPPORT)
46+
47+
# prepare compiling for s390x
48+
if(HAS_S390X_VX_SUPPORT OR HAS_Z13_S390X_VX_SUPPORT)
49+
if(ZLIB_BUILD_SHARED)
50+
target_sources(zlib
51+
PRIVATE
52+
crc32_vx.c
53+
crc32_vx_hooks.h)
54+
target_compile_definitions(zlib PUBLIC -DHAVE_S390X_VX=1)
55+
endif(ZLIB_BUILD_SHARED)
56+
if(ZLIB_BUILD_STATIC)
57+
target_sources(zlibstatic
58+
PRIVATE
59+
crc32_vx.c
60+
crc32_vx_hooks.h)
61+
target_compile_definitions(zlibstatic PUBLIC -DHAVE_S390X_VX=1)
62+
endif(ZLIB_BUILD_STATIC)
63+
set_source_files_properties(
64+
crc32_vx.c
65+
PROPERTIES COMPILE_OPTIONS "${VGFMAFLAG}")
66+
endif(HAS_S390X_VX_SUPPORT OR HAS_Z13_S390X_VX_SUPPORT)
67+
endif(ZLIB_WITH_CRC32VX AND HAS_S390X_SUPPORT)

contrib/crc32vx/README

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
IBM Z mainframes starting from version z13 provide vector instructions, which
2+
allows vectorization of crc32. This extension is build by default when targeting
3+
ibm s390x. However this extension can disabled if desired:
4+
5+
# for configure build
6+
$ ./configure --disable-crcvx
7+
8+
# for cmake build
9+
$ cmake .. -DZLIB_CRC32VX=off

0 commit comments

Comments
 (0)