Skip to content

Commit 34d4bba

Browse files
committed
AOCL-BLAS 5.0 Release
2 parents 7c564c7 + f3c166b commit 34d4bba

File tree

1,264 files changed

+234393
-50230
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,264 files changed

+234393
-50230
lines changed

.appveyor.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
skip_branch_with_pr: true
2+
13
environment:
24
matrix:
35
- LIB_TYPE: shared

.gitignore

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,10 @@ GPATH
5454
GRTAGS
5555
GTAGS
5656

57-
# Windows Build
58-
build/*
57+
# cmake builds
58+
build_*/*
59+
60+
# Windows build
5961
bin/*
6062
*.dll
6163
*.lib

.travis.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,13 @@ matrix:
4848
CC=aarch64-linux-gnu-gcc CXX=aarch64-linux-gnu-g++ \
4949
PACKAGES="gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libc6-dev-arm64-cross qemu-system-arm qemu-user" \
5050
TESTSUITE_WRAPPER="qemu-aarch64 -L /usr/aarch64-linux-gnu/"
51+
# Apple M1 (firestorm) build and fast testsuite (qemu)
52+
- os: linux
53+
compiler: aarch64-linux-gnu-gcc
54+
env: OOT=0 TEST=FAST SDE=0 THR="none" CONF="firestorm" \
55+
CC=aarch64-linux-gnu-gcc CXX=aarch64-linux-gnu-g++ \
56+
PACKAGES="gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libc6-dev-arm64-cross qemu-system-arm qemu-user" \
57+
TESTSUITE_WRAPPER="qemu-aarch64 -L /usr/aarch64-linux-gnu/"
5158
# armsve build and fast testsuite (qemu)
5259
- os: linux
5360
compiler: aarch64-linux-gnu-gcc-10

CMakeLists.txt

Lines changed: 313 additions & 85 deletions
Large diffs are not rendered by default.

CMakePresets.json

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
{
2+
"version": 6,
3+
"cmakeMinimumRequired": {
4+
"major": 3,
5+
"minor": 25,
6+
"patch": 0
7+
},
8+
"include": [
9+
"build/cmake/presets/linux-make-clang.json",
10+
"build/cmake/presets/linux-make-gcc.json",
11+
"build/cmake/presets/linux-make.json",
12+
"build/cmake/presets/linux-ninja.json",
13+
"build/cmake/presets/win-msvc.json",
14+
"build/cmake/presets/win-ninja.json"
15+
]
16+
}

CREDITS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ but many others have contributed code and feedback, including
9292
Nathaniel Smith @njsmith
9393
Shaden Smith @ShadenSmith
9494
Tyler Smith @tlrmchlsmth (The University of Texas at Austin)
95+
Snehith @ArcadioN09
9596
Paul Springer @springer13 (RWTH Aachen University)
9697
Adam J. Stewart @adamjstewart (University of Illinois at Urbana-Champaign)
9798
Vladimir Sukarev

LICENSE

Lines changed: 128 additions & 42 deletions
Large diffs are not rendered by default.

Makefile

Lines changed: 63 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# libraries.
66
#
77
# Copyright (C) 2014, The University of Texas at Austin
8-
# Copyright (C) 2022 - 2023, Advanced Micro Devices, Inc. All rights reserved.
8+
# Copyright (C) 2022 - 2024, Advanced Micro Devices, Inc. All rights reserved.
99
#
1010
# Redistribution and use in source and binary forms, with or without
1111
# modification, are permitted provided that the following conditions are
@@ -191,6 +191,13 @@ gen-obj-paths-from-src = $(foreach ch, $(1), \
191191
# directories.
192192
MK_CONFIG_OBJS := $(call gen-obj-paths-from-src,$(CONFIG_SRC_SUFS),$(MK_CONFIG_SRC),$(CONFIG_PATH),$(BASE_OBJ_CONFIG_PATH))
193193

194+
MK_KERNELS_LPGEMM_SRC := $(filter ./kernels/zen/lpgemm/%.c, $(MK_KERNELS_SRC))
195+
MK_KERNELS_LPGEMM_SRC += $(filter ./kernels/zen4/lpgemm/%.c, $(MK_KERNELS_SRC))
196+
MK_KERNELS_SRC := $(filter-out $(MK_KERNELS_LPGEMM_SRC),$(MK_KERNELS_SRC))
197+
ifeq ($(filter aocl_gemm, $(ADDON_LIST)), aocl_gemm)
198+
MK_KERNELS_LPGEMM_OBJS := $(call gen-obj-paths-from-src,$(KERNELS_SRC_SUFS),$(MK_KERNELS_LPGEMM_SRC),$(KERNELS_PATH),$(BASE_OBJ_KERNELS_PATH))
199+
endif
200+
194201
# Generate object file paths for architecture-specific kernel source code.
195202
# We target only .c, .s, and .S files. Note that MK_KERNELS_SRC is already
196203
# limited to the kernel source corresponding to the kernel sets in
@@ -220,10 +227,29 @@ MK_ADDON_KERS_SRC := $(foreach addon, $(ADDON_LIST), \
220227
$(filter $(ADDON_PATH)/$(addon)/$(KERNELS_DIR)/%, \
221228
$(MK_ADDON_SRC)) \
222229
)
230+
231+
# Generate non-kernel list for all addons except aocl_gemm
232+
# We process aocl_gemma addon separately.
223233
MK_ADDON_OTHER_SRC := $(foreach addon, $(ADDON_LIST), \
224-
$(filter-out $(ADDON_PATH)/$(addon)/$(KERNELS_DIR)/%, \
225-
$(MK_ADDON_SRC)) \
234+
$(if $(filter-out aocl_gemm,$(addon)), \
235+
$(filter-out $(ADDON_PATH)/$(addon)/$(KERNELS_DIR)/%, \
236+
$(MK_ADDON_SRC))) \
226237
)
238+
239+
# Pick the .cpp files present in JIT folder only in the following conditions
240+
# 1. when gcc version is older than 11.2
241+
# 2. when aocl_gemm addon is enabled.
242+
ifeq ($(filter aocl_gemm, $(ADDON_LIST)), aocl_gemm)
243+
ifeq ($(GCC_OT_11_2_0),no)
244+
MK_AOCL_GEMM_OTHER_SRC := $(filter-out $(ADDON_PATH)/$(aocl_gemm)/$(KERNELS_DIR)/%, \
245+
$(MK_ADDON_SRC))
246+
MK_ADDON_OTHER_SRC := $(filter %.c,$(MK_AOCL_GEMM_OTHER_SRC))
247+
else
248+
MK_ADDON_OTHER_SRC := $(filter-out $(ADDON_PATH)/$(aocl_gemm)/$(KERNELS_DIR)/%, \
249+
$(MK_ADDON_SRC))
250+
endif
251+
endif
252+
227253
MK_ADDON_KERS_OBJS := $(call gen-obj-paths-from-src,$(ADDON_SRC_SUFS),$(MK_ADDON_KERS_SRC),$(ADDON_PATH),$(BASE_OBJ_ADDON_PATH))
228254
MK_ADDON_OTHER_OBJS := $(call gen-obj-paths-from-src,$(ADDON_SRC_SUFS),$(MK_ADDON_OTHER_SRC),$(ADDON_PATH),$(BASE_OBJ_ADDON_PATH))
229255
MK_ADDON_OBJS := $(MK_ADDON_KERS_OBJS) $(MK_ADDON_OTHER_OBJS)
@@ -264,6 +290,10 @@ MK_BLIS_OBJS := $(MK_CONFIG_OBJS) \
264290
$(MK_ADDON_OBJS) \
265291
$(MK_SANDBOX_OBJS)
266292

293+
ifeq ($(filter aocl_gemm, $(ADDON_LIST)), aocl_gemm)
294+
MK_BLIS_OBJS += $(MK_KERNELS_LPGEMM_OBJS)
295+
endif
296+
267297
# Optionally filter out the BLAS and CBLAS compatibility layer object files.
268298
# This is not actually necessary, since each affected file is guarded by C
269299
# preprocessor macros, but it but prevents "empty" object files from being
@@ -606,6 +636,19 @@ else
606636
endif
607637
endef
608638

639+
# first argument: a kernel set (name) being targeted (e.g. haswell).
640+
# second argument: the configuration whose CFLAGS we should use in compilation.
641+
# third argument: the kernel file suffix being considered.
642+
define make-kernels-lpgemm-rule
643+
$(BASE_OBJ_KERNELS_PATH)/$(1)/%.o: $(KERNELS_PATH)/$(1)/%.$(3) $(BLIS_H_FLAT) $(MAKE_DEFS_MK_PATHS)
644+
ifeq ($(ENABLE_VERBOSE),yes)
645+
$(CC) $(call get-kernel-lpgemm-cflags-for,$(2)) -c $$< -o $$@
646+
else
647+
@echo "Compiling $$@" $(call get-kernel-lpgemm-text-for,$(2))
648+
@$(CC) $(call get-kernel-lpgemm-cflags-for,$(2)) -c $$< -o $$@
649+
endif
650+
endef
651+
609652
# first argument: a configuration name from the union of config_list and
610653
# config_name, used to look up the CFLAGS to use during compilation.
611654
# second argument: the C99 addon file suffix being considered.
@@ -710,6 +753,10 @@ $(foreach conf, $(CONFIG_LIST), $(eval $(call make-refkern-rule,$(conf))))
710753
$(foreach suf, $(KERNELS_SRC_SUFS), \
711754
$(foreach kset, $(KERNEL_LIST), $(eval $(call make-kernels-rule,$(kset),$(call get-config-for-kset,$(kset)),$(suf)))))
712755

756+
ifeq ($(filter aocl_gemm, $(ADDON_LIST)), aocl_gemm)
757+
$(foreach suf, $(KERNELS_SRC_SUFS), \
758+
$(foreach kset, $(KERNEL_LIST), $(eval $(call make-kernels-lpgemm-rule,$(kset)/lpgemm,$(call get-config-for-kset,$(kset)),$(suf)))))
759+
endif
713760
# Instantiate the build rule for C addon files. Use the CFLAGS for the
714761
# configuration family.
715762
$(foreach suf, $(ADDON_C99_SUFS), \
@@ -850,20 +897,14 @@ else
850897
@$(RANLIB) $@
851898
endif
852899

853-
# first argument: the base name of the BLAS test driver.
854-
define make-blat-rule
855-
$(BASE_EXE_BLASTEST_PATH)/$(1).x: $(BASE_OBJ_BLASTEST_PATH)/$(1).o $(BLASTEST_F2C_LIB) $(LIBBLIS_LINK)
900+
$(BASE_EXE_BLASTEST_PATH)/%.x: $(BASE_OBJ_BLASTEST_PATH)/%.o $(BLASTEST_F2C_LIB) $(LIBBLIS_LINK)
856901
@mkdir -p $(BASE_EXE_BLASTEST_PATH)
857902
ifeq ($(ENABLE_VERBOSE),yes)
858-
$(LINKER) $(BASE_OBJ_BLASTEST_PATH)/$(1).o $(BLASTEST_F2C_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $$@
903+
$(LINKER) $< $(BLASTEST_F2C_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@
859904
else
860-
@echo "Linking $$(@F) against '$(notdir $(BLASTEST_F2C_LIB)) $(LIBBLIS_LINK) $(LDFLAGS)'"
861-
@$(LINKER) $(BASE_OBJ_BLASTEST_PATH)/$(1).o $(BLASTEST_F2C_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $$@
905+
@echo "Linking $@ against '$(notdir $(BLASTEST_F2C_LIB)) $(LIBBLIS_LINK) "$(LDFLAGS)"'"
906+
@$(LINKER) $< $(BLASTEST_F2C_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@
862907
endif
863-
endef
864-
865-
# Instantiate the rule above for each driver file.
866-
$(foreach name, $(BLASTEST_DRV_BASES), $(eval $(call make-blat-rule,$(name))))
867908

868909
# A rule to run ?blat1.x driver files.
869910
define make-run-blat1-rule
@@ -933,7 +974,7 @@ $(TESTSUITE_BIN): $(MK_TESTSUITE_OBJS) $(LIBBLIS_LINK)
933974
ifeq ($(ENABLE_VERBOSE),yes)
934975
$(LINKER) $(MK_TESTSUITE_OBJS) $(LIBBLIS_LINK) $(LDFLAGS) -o $@
935976
else
936-
@echo "Linking $@ against '$(LIBBLIS_LINK) $(LDFLAGS)'"
977+
@echo "Linking $@ against '$(LIBBLIS_LINK) "$(LDFLAGS)"'"
937978
@$(LINKER) $(MK_TESTSUITE_OBJS) $(LIBBLIS_LINK) $(LDFLAGS) -o $@
938979
endif
939980

@@ -1081,13 +1122,21 @@ else
10811122
$(@)/$(CONFIG_DIR)/$(CONFIG_NAME)/
10821123
endif
10831124

1125+
# BLIS library in pkg-configure blis.pc.in file.
1126+
ifeq ($(THREADING_MODEL),off)
1127+
AOCLLIB := blis
1128+
else
1129+
AOCLLIB := blis-mt
1130+
endif
1131+
10841132
$(PC_SHARE_DIR_INST): $(PC_IN_FILE)
10851133
$(MKDIR) $(@)
10861134
ifeq ($(ENABLE_VERBOSE),no)
10871135
@echo "Installing $(PC_OUT_FILE) into $(@)/"
10881136
endif
10891137
$(shell cat "$(PC_IN_FILE)" \
10901138
| sed -e "s#@PACKAGE_VERSION@#$(VERSION)#g" \
1139+
| sed -e "s#@AOCLLIB@#$(AOCLLIB)#g" \
10911140
| sed -e "s#@prefix@#$(prefix)#g" \
10921141
| sed -e "s#@exec_prefix@#$(exec_prefix)#g" \
10931142
| sed -e "s#@libdir@#$(libdir)#g" \

addon/CMakeLists.txt

Lines changed: 44 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,36 @@
1-
##Copyright (C) 2023, Advanced Micro Devices, Inc. All rights reserved. ##
1+
#[=[
2+
3+
BLIS
4+
An object-based framework for developing high-performance BLAS-like
5+
libraries.
6+
7+
Copyright (C) 2023 - 2024, Advanced Micro Devices, Inc. All rights reserved.
8+
9+
Redistribution and use in source and binary forms, with or without
10+
modification, are permitted provided that the following conditions are
11+
met:
12+
- Redistributions of source code must retain the above copyright
13+
notice, this list of conditions and the following disclaimer.
14+
- Redistributions in binary form must reproduce the above copyright
15+
notice, this list of conditions and the following disclaimer in the
16+
documentation and/or other materials provided with the distribution.
17+
- Neither the name(s) of the copyright holder(s) nor the names of its
18+
contributors may be used to endorse or promote products derived
19+
from this software without specific prior written permission.
20+
21+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25+
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32+
33+
]=]
234

335
# Writing a function that will be used to generate the required object
436
# libraries for the required addons.
@@ -59,17 +91,16 @@ function(generate_addon_targets addon_target)
5991
# in get-addon-c99flags-for
6092
${CADDONINCFLAGS}
6193
)
94+
6295
if(THREADING_MODEL STREQUAL "openmp")
6396
# Equivalent to CTHREADFLAGS in get-noopt-cflags-for
6497
target_link_libraries(${addon_target}_C99_ADDON PRIVATE OpenMP::OpenMP_C)
6598
elseif(THREADING_MODEL STREQUAL "pthreads")
6699
# in get-noopt-cflags-for
67100
target_compile_options(${addon_target}_C99_ADDON PRIVATE ${CTHREADFLAGS})
68101
endif()
69-
if(BUILD_SHARED_LIBS)
70-
# Equivalent to CPICFLAGS in get-noopt-cflags-for
71-
set_target_properties(${addon_target}_C99_ADDON PROPERTIES POSITION_INDEPENDENT_CODE ON)
72-
endif()
102+
# Equivalent to CPICFLAGS in get-noopt-cflags-for
103+
set_target_properties(${addon_target}_C99_ADDON PROPERTIES POSITION_INDEPENDENT_CODE ON)
73104
add_dependencies(${addon_target}_C99_ADDON flat-header)
74105
# Put all those targets under object-libs-targets folder name so that they appear all together in IDE.
75106
set_target_properties(${addon_target}_C99_ADDON PROPERTIES FOLDER object-libs-targets)
@@ -128,17 +159,17 @@ function(generate_addon_targets addon_target)
128159
# in get-noopt-cflags-for
129160
target_compile_options(${addon_target}_C99_KERNEL_ADDON PRIVATE ${CTHREADFLAGS})
130161
endif()
131-
if(BUILD_SHARED_LIBS)
132-
# Equivalent to CPICFLAGS in get-noopt-cflags-for
133-
set_target_properties(${addon_target}_C99_KERNEL_ADDON PROPERTIES POSITION_INDEPENDENT_CODE ON)
134-
endif()
162+
# Equivalent to CPICFLAGS in get-noopt-cflags-for
163+
set_target_properties(${addon_target}_C99_KERNEL_ADDON PROPERTIES POSITION_INDEPENDENT_CODE ON)
135164
add_dependencies(${addon_target}_C99_KERNEL_ADDON flat-header)
136165
# Put all those targets under object-libs-targets folder name so that they appear all together in IDE.
137166
set_target_properties(${addon_target}_C99_KERNEL_ADDON PROPERTIES FOLDER object-libs-targets)
138167
endif()
139168

140-
# Collect all subdirectory paths that have at least one file with suffix in ADDON_CXX_SUFS list.
141-
get_filepaths_with_suffixes(LOCAL_SOURCE_CXX_FILES "${CMAKE_CURRENT_SOURCE_DIR}/${addon_target}" "${ADDON_CXX_SUFS}")
169+
if(("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU") AND (CMAKE_C_COMPILER_VERSION VERSION_LESS 11.2.0))
170+
# Collect all subdirectory paths that have at least one file with suffix in ADDON_CXX_SUFS list.
171+
get_filepaths_with_suffixes(LOCAL_SOURCE_CXX_FILES "${CMAKE_CURRENT_SOURCE_DIR}/${addon_target}" "${ADDON_CXX_SUFS}")
172+
endif()
142173

143174
# Only generate the object library if there is at least one source file.
144175
list(LENGTH LOCAL_SOURCE_CXX_FILES size)
@@ -190,10 +221,8 @@ function(generate_addon_targets addon_target)
190221
# in get-noopt-cflags-for
191222
target_compile_options(${addon_target}_CXX_ADDON PRIVATE ${CTHREADFLAGS})
192223
endif()
193-
if(BUILD_SHARED_LIBS)
194-
# Equivalent to CPICFLAGS in get-noopt-cflags-for
195-
set_target_properties(${addon_target}_CXX_ADDON PROPERTIES POSITION_INDEPENDENT_CODE ON)
196-
endif()
224+
# Equivalent to CPICFLAGS in get-noopt-cflags-for
225+
set_target_properties(${addon_target}_CXX_ADDON PROPERTIES POSITION_INDEPENDENT_CODE ON)
197226
add_dependencies(${addon_target}_CXX_ADDON flat-header)
198227
# Put all those targets under object-libs-targets folder name so that they appear all together in IDE.
199228
set_target_properties(${addon_target}_CXX_ADDON PROPERTIES FOLDER object-libs-targets)

0 commit comments

Comments
 (0)