diff --git a/blis.pc b/blis.pc new file mode 100644 index 000000000..3c3f71861 --- /dev/null +++ b/blis.pc @@ -0,0 +1,11 @@ +prefix=/home/gceccolini/blis_sg2042 +exec_prefix=/home/gceccolini/blis_sg2042 +libdir=/home/gceccolini/blis_sg2042/lib +includedir=/home/gceccolini/blis_sg2042/include + +Name: BLIS +Description: BLAS-like Library Instantiation Software Framework +Version: 0.9.0-180 +Libs: -L${libdir} -lblis +Libs.private: -lm -lpthread -latomic +Cflags: -I${includedir}/blis diff --git a/config/rvv_sg2042/bli_cntx_init_rvv_sg2042.c b/config/rvv_sg2042/bli_cntx_init_rvv_sg2042.c new file mode 100644 index 000000000..968446d42 --- /dev/null +++ b/config/rvv_sg2042/bli_cntx_init_rvv_sg2042.c @@ -0,0 +1,115 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "../../kernels/rvv_sg2042/3/bli_rvv_sg2042_utils.h" + +void bli_cntx_init_rvv_sg2042( cntx_t* cntx ) +{ + printf("!!!SONO LA NUOVA SUB-CONFIGURAZIONE!!!\n"); + blksz_t blkszs[ BLIS_NUM_BLKSZS ]; + + // Set default kernel blocksizes and functions. + bli_cntx_init_rvv_sg2042_ref( cntx ); + + // ------------------------------------------------------------------------- + + // A reasonable assumptions for application cores is VLEN >= 128 bits, i.e., + // v >= 4. Embedded cores, however, may implement the minimal configuration, + // which allows VLEN = 32 bits. Here, we assume VLEN >= 128 and otherwise + // fall back to the reference kernels. + const uint32_t v = get_vlenb() / sizeof(float); + + if ( v >= 4 ) + { + const uint32_t mr_s = 4 * v; + const uint32_t mr_d = 2 * v; + const uint32_t mr_c = 2 * v; + const uint32_t mr_z = v; + + // TODO: Register different kernels based on the value + // of v to avoid MC becoming too big. (e.g. 2vx8) + + // Update the context with optimized native gemm micro-kernels. + bli_cntx_set_ukrs + ( + cntx, + + // level-3 + BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_rvv_sg2042_4vx4, + BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_rvv_sg2042_4vx4, + BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_rvv_sg2042_4vx4, + BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_rvv_sg2042_4vx4, + + BLIS_VA_END + ); + + // Update the context with storage preferences. + bli_cntx_set_ukr_prefs + ( + cntx, + + // level-3 + BLIS_GEMM_UKR_ROW_PREF, BLIS_FLOAT, FALSE, + BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, FALSE, + BLIS_GEMM_UKR_ROW_PREF, BLIS_SCOMPLEX, FALSE, + BLIS_GEMM_UKR_ROW_PREF, BLIS_DCOMPLEX, FALSE, + + BLIS_VA_END + ); + + // Initialize level-3 blocksize objects with architecture-specific values. + // s d c z + bli_blksz_init_easy( &blkszs[ BLIS_MR ], mr_s, mr_d, mr_c, mr_z ); + bli_blksz_init_easy( &blkszs[ BLIS_NR ], 4, 4, 4, 4 ); + bli_blksz_init_easy( &blkszs[ BLIS_MC ], 20*mr_s, 20*mr_d, 60*mr_c, 30*mr_z ); + bli_blksz_init_easy( &blkszs[ BLIS_KC ], 640, 320, 320, 160 ); + bli_blksz_init_easy( &blkszs[ BLIS_NC ], 3072, 3072, 3072, 3072 ); + + // Update the context with the current architecture's register and cache + // blocksizes (and multiples) for native execution. + bli_cntx_set_blkszs + ( + cntx, + + // level-3 + BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, + BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, + BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, + BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, + BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, + + BLIS_VA_END + ); + } +} diff --git a/config/rvv_sg2042/bli_kernel_defs_rvv_sg2042.h b/config/rvv_sg2042/bli_kernel_defs_rvv_sg2042.h new file mode 100644 index 000000000..18ca4030e --- /dev/null +++ b/config/rvv_sg2042/bli_kernel_defs_rvv_sg2042.h @@ -0,0 +1,42 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + + +//#endif diff --git a/config/rvv_sg2042/make_defs.mk b/config/rvv_sg2042/make_defs.mk new file mode 100644 index 000000000..0244f38ea --- /dev/null +++ b/config/rvv_sg2042/make_defs.mk @@ -0,0 +1,105 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := rvv_sg2042 +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := -DRISCV_SIZE=64 + +#RISCV_ARCH := $(shell $(CC) -DFORCE_RISCV_VECTOR -E build/detect/riscv/bli_riscv_detect_arch.h | grep '^[^\#]') +#RISCV_ABI := $(shell $(CC) -DFORCE_RISCV_VECTOR -E build/detect/riscv/bli_riscv_detect_abi.h | grep '^[^\#]') +RISCV_ARCH := rv64gc_zfh_xtheadvector +RISCV_ABI := lp64d + +ifeq (,$(findstring 64,$(RISCV_ARCH))) +$(error The RISC-V compiler architecture $(RISCV_ARCH) is not compatible with $(THIS_CONFIG)) +else ifeq (,$(findstring 64,$(RISCV_ABI))) +$(error The RISC-V compiler ABI $(RISCV_ABI) is not compatible with $(THIS_CONFIG)) +endif + +CMISCFLAGS := -march=$(RISCV_ARCH) -mabi=$(RISCV_ABI) +CPICFLAGS := -fPIC +CWARNFLAGS := -Wall -Wno-unused-function -Wfatal-errors + +# In case the A extension is not available +LDFLAGS += -latomic + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 -ftree-vectorize +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := +else +$(error gcc or clang is required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +# Lower compiler optimization. cinvscalv fails at -O1 +CRVECFLAGS := $(CKVECFLAGS) -O0 +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) diff --git a/config_registry b/config_registry index 815439348..f30c7d835 100644 --- a/config_registry +++ b/config_registry @@ -55,12 +55,13 @@ power10: power10 power9: power9 bgq: bgq -# RISC-V architectures. +# RISC-V architectures. Added! rv32i: rv32i/rvi rv64i: rv64i/rvi rv32iv: rv32iv/rviv rv64iv: rv64iv/rviv - +rvv_sg2042: rvv_sg2042 + # SiFive architectures. sifive_rvv: sifive_rvv sifive_x280: sifive_x280/sifive_rvv diff --git a/frame/base/bli_arch.c b/frame/base/bli_arch.c index 53d9bdefd..8f37b1d86 100644 --- a/frame/base/bli_arch.c +++ b/frame/base/bli_arch.c @@ -285,6 +285,9 @@ arch_t bli_arch_query_id_impl( void ) #ifdef BLIS_FAMILY_RV64IV id = BLIS_ARCH_RV64IV; #endif + #ifdef BLIS_FAMILY_RVV_SG2042 // added! + id = BLIS_ARCH_RVV_SG2042; + #endif // SiFive microarchitectures. #ifdef BLIS_FAMILY_SIFIVE_RVV @@ -358,6 +361,7 @@ static const char* config_name[ BLIS_NUM_ARCHS ] = "rv64i", "rv32iv", "rv64iv", + "rvv_sg2042", // added! "sifive_rvv", "sifive_x280", diff --git a/frame/include/bli_arch_config.h b/frame/include/bli_arch_config.h index 49a894302..721607397 100644 --- a/frame/include/bli_arch_config.h +++ b/frame/include/bli_arch_config.h @@ -277,7 +277,9 @@ INSERT_GENTCONF #ifdef BLIS_KERNELS_RVIV #include "bli_kernels_rviv.h" #endif - +#ifdef BLIS_KERNELS_RVV_SG2042 +#include "bli_kernels_rvv_sg2042.h" +#endif // -- SiFive RISC-V architectures -- #ifdef BLIS_KERNELS_SIFIVE_RVV diff --git a/frame/include/bli_gentconf_macro_defs.h b/frame/include/bli_gentconf_macro_defs.h index f6f3af20e..98ad7e65c 100644 --- a/frame/include/bli_gentconf_macro_defs.h +++ b/frame/include/bli_gentconf_macro_defs.h @@ -219,7 +219,11 @@ #else #define INSERT_GENTCONF_RV64IV #endif - +#ifdef BLIS_CONFIG_RVV_SG2042 // added! +#define INSERT_GENTCONF_RVV_SG2042 GENTCONF( RVV_SG2042, rvv_sg2042 ) +#else +#define INSERT_GENTCONF_RVV_SG2042 +#endif // -- SiFive architectures ---------------------------------------------------- #ifdef BLIS_CONFIG_SIFIVE_RVV @@ -284,6 +288,7 @@ INSERT_GENTCONF_RV32I \ INSERT_GENTCONF_RV64I \ INSERT_GENTCONF_RV32IV \ INSERT_GENTCONF_RV64IV \ +INSERT_GENTCONF_RVV_SG2042 \ \ INSERT_GENTCONF_SIFIVE_RVV \ INSERT_GENTCONF_SIFIVE_X280 \ diff --git a/frame/include/bli_type_defs.h b/frame/include/bli_type_defs.h index 890d216ea..809b44dd9 100644 --- a/frame/include/bli_type_defs.h +++ b/frame/include/bli_type_defs.h @@ -1005,6 +1005,7 @@ typedef enum BLIS_ARCH_RV64I, BLIS_ARCH_RV32IV, BLIS_ARCH_RV64IV, + BLIS_ARCH_RVV_SG2042, // added! // SiFive BLIS_ARCH_SIFIVE_RVV, diff --git a/include/blis.h b/include/blis.h new file mode 100644 index 000000000..999edb6a5 --- /dev/null +++ b/include/blis.h @@ -0,0 +1 @@ +#include diff --git a/include/cblas.h b/include/cblas.h new file mode 100644 index 000000000..f9ab36872 --- /dev/null +++ b/include/cblas.h @@ -0,0 +1 @@ +#include diff --git a/kernels/rvv_sg2042/3/bli_cgemm_rvv_sg2042_4vx4.c b/kernels/rvv_sg2042/3/bli_cgemm_rvv_sg2042_4vx4.c new file mode 100644 index 000000000..dd814fcbd --- /dev/null +++ b/kernels/rvv_sg2042/3/bli_cgemm_rvv_sg2042_4vx4.c @@ -0,0 +1,79 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2023, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "bli_rvv_sg2042_utils.h" + +void bli_cgemm_rvv_sg2042_asm_4vx4 + ( + intptr_t k, + const void* alpha, + const void* a, + const void* b, + const void* beta, + void* c, intptr_t rs_c, intptr_t cs_c + ); + +void bli_cgemm_rvv_sg2042_4vx4 + ( + dim_t m, + dim_t n, + dim_t k, + const void* alpha, + const void* a, + const void* b, + const void* beta, + void* c, inc_t rs_c, inc_t cs_c, + const auxinfo_t* data, + const cntx_t* cntx + ) +{ + // The assembly kernels always take native machine-sized integer arguments. + // dim_t and inc_t are normally defined as being machine-sized. If larger, assert. + bli_static_assert( sizeof(dim_t) <= sizeof(intptr_t) && + sizeof(inc_t) <= sizeof(intptr_t) ); + + // Extract vector-length dependent mr, nr that are fixed at configure time. + const inc_t mr = bli_cntx_get_blksz_def_dt( BLIS_SCOMPLEX, BLIS_MR, cntx ); + const inc_t nr = 4; + + GEMM_UKR_SETUP_CT( c, mr, nr, false ); + + // The kernel assumes rs_c == 1, and the context should not deviate from it. + assert( rs_c == 1 ); + + bli_cgemm_rvv_sg2042_asm_4vx4( k, alpha, a, b, beta, c, + get_vlenb() * 2, cs_c * sizeof(scomplex) ); + + GEMM_UKR_FLUSH_CT( c ); +} diff --git a/kernels/rvv_sg2042/3/bli_cgemm_rvv_sg2042_asm_4vx4.S b/kernels/rvv_sg2042/3/bli_cgemm_rvv_sg2042_asm_4vx4.S new file mode 100644 index 000000000..212285318 --- /dev/null +++ b/kernels/rvv_sg2042/3/bli_cgemm_rvv_sg2042_asm_4vx4.S @@ -0,0 +1,45 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2023, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +*/ + +#define REALNAME bli_cgemm_rvv_sg2042_asm_4vx4 +#define DATASIZE 8 +#define VTYPE e32 +#define FLOAD flw +#define FZERO(fr) fcvt.s.w fr, x0 +#define FEQ feq.s +#define VLE th.vlseg2w.v +#define VSE th.vsseg2w.v + +#include "bli_czgemm_rvv_sg2042_asm_4vx4.h" diff --git a/kernels/rvv_sg2042/3/bli_czgemm_rvv_sg2042_asm_4vx4.h b/kernels/rvv_sg2042/3/bli_czgemm_rvv_sg2042_asm_4vx4.h new file mode 100644 index 000000000..0b873c547 --- /dev/null +++ b/kernels/rvv_sg2042/3/bli_czgemm_rvv_sg2042_asm_4vx4.h @@ -0,0 +1,801 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2023, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +*/ + + .text + .align 2 + .global REALNAME + +// void REALNAME(intptr_t k, void* alpha, void* a, void* b, +// void* beta, void* c, intptr_t rs_c, intptr_t cs_c) +// +// register arguments: +// a0 k +// a1 alpha +// a2 a +// a3 b +// a4 beta +// a5 c +// a6 rs_c +// a7 cs_c +// + +#define REALSIZE (DATASIZE/2) + +#define loop_counter a0 + +#define A00_ptr a2 +#define A10_ptr t0 +#define A01_ptr t1 +#define A11_ptr t2 + +#define B_row_ptr a3 + +#define C00_ptr a5 +#define C01_ptr t3 +#define C02_ptr t4 +#define C03_ptr t5 +#define C10_ptr s1 +#define C11_ptr s2 +#define C12_ptr s3 +#define C13_ptr s4 + +#define tmp t6 + +#define ALPHA_re fa0 +#define ALPHA_im fa1 +#define BETA_re fa2 +#define BETA_im fa3 + +#define B00_re fa4 +#define B00_im fa5 +#define B01_re fa6 +#define B01_im fa7 +#define B02_re fa0 +#define B02_im fa1 +#define B03_re fa2 +#define B03_im fa3 + +#define B10_re ft0 +#define B10_im ft1 +#define B11_re ft2 +#define B11_im ft3 +#define B12_re ft4 +#define B12_im ft5 +#define B13_re ft6 +#define B13_im ft7 + +#define fzero ft8 + +#define A00_re v24 +#define A00_im v25 +#define A10_re v26 +#define A10_im v27 +#define A01_re v28 +#define A01_im v29 +#define A11_re v30 +#define A11_im v31 + +#define C0_re v24 +#define C0_im v25 +#define C1_re v26 +#define C1_im v27 +#define C2_re v28 +#define C2_im v29 +#define C3_re v30 +#define C3_im v31 + +#define AB00_re v0 +#define AB00_im v1 +#define AB01_re v2 +#define AB01_im v3 +#define AB02_re v4 +#define AB02_im v5 +#define AB03_re v6 +#define AB03_im v7 +#define AB10_re v8 +#define AB10_im v9 +#define AB11_re v10 +#define AB11_im v11 +#define AB12_re v12 +#define AB12_im v13 +#define AB13_re v14 +#define AB13_im v15 + +#define tmp0_re v16 +#define tmp0_im v17 +#define tmp1_re v18 +#define tmp1_im v19 +#define tmp2_re v20 +#define tmp2_im v21 +#define tmp3_re v22 +#define tmp3_im v23 + +#define rs_c a6 +#define cs_c a7 + +REALNAME: + #include "rvv_sg2042_save_registers.h" + + th.vsetvli s0, zero, VTYPE, m1 + csrr s0, vlenb + slli s0, s0, 1 + FZERO(fzero) + + // Set up pointers + add C01_ptr, C00_ptr, cs_c + add C02_ptr, C01_ptr, cs_c + add C03_ptr, C02_ptr, cs_c + add C10_ptr, C00_ptr, rs_c + add C11_ptr, C01_ptr, rs_c + add C12_ptr, C02_ptr, rs_c + add C13_ptr, C03_ptr, rs_c + + // Zero-initialize accumulators + th.vxor.vv AB00_re, AB00_re, AB00_re + th.vxor.vv AB00_im, AB00_im, AB00_im + th.vxor.vv AB01_re, AB01_re, AB01_re + th.vxor.vv AB01_im, AB01_im, AB01_im + th.vxor.vv AB02_re, AB02_re, AB02_re + th.vxor.vv AB02_im, AB02_im, AB02_im + th.vxor.vv AB03_re, AB03_re, AB03_re + th.vxor.vv AB03_im, AB03_im, AB03_im + th.vxor.vv AB10_re, AB10_re, AB10_re + th.vxor.vv AB10_im, AB10_im, AB10_im + th.vxor.vv AB11_re, AB11_re, AB11_re + th.vxor.vv AB11_im, AB11_im, AB11_im + th.vxor.vv AB12_re, AB12_re, AB12_re + th.vxor.vv AB12_im, AB12_im, AB12_im + th.vxor.vv AB13_re, AB13_re, AB13_re + th.vxor.vv AB13_im, AB13_im, AB13_im + + // Handle k == 0 + beqz loop_counter, MULTIPLYBETA + + add A10_ptr, A00_ptr, s0 + slli s0, s0, 1 // length of a column of A in bytes + add A01_ptr, A00_ptr, s0 + add A11_ptr, A10_ptr, s0 + + li tmp, 3 + ble loop_counter, tmp, TAIL_UNROLL_2 + + // Preload A and B + // Load and deinterleave A(:,l) + VLE A00_re, (A00_ptr) + VLE A10_re, (A10_ptr) + + // Load B(l,0:3) + FLOAD B00_re, 0*REALSIZE(B_row_ptr) + FLOAD B00_im, 1*REALSIZE(B_row_ptr) + FLOAD B01_re, 2*REALSIZE(B_row_ptr) + FLOAD B01_im, 3*REALSIZE(B_row_ptr) + FLOAD B02_re, 4*REALSIZE(B_row_ptr) + FLOAD B02_im, 5*REALSIZE(B_row_ptr) + FLOAD B03_re, 6*REALSIZE(B_row_ptr) + FLOAD B03_im, 7*REALSIZE(B_row_ptr) + + // Load and deinterleave A(:,l+1) + VLE A01_re, (A01_ptr) + VLE A11_re, (A11_ptr) + +LOOP_UNROLL_4: // loop_counter >= 4 + addi loop_counter, loop_counter, -4 + + th.vfmacc.vf AB00_re, B00_re, A00_re // AB(:,0) += A(:,l) * B(l,0) + th.vfnmsac.vf AB00_re, B00_im, A00_im + th.vfmacc.vf AB00_im, B00_re, A00_im + th.vfmacc.vf AB00_im, B00_im, A00_re + th.vfmacc.vf AB10_re, B00_re, A10_re + th.vfnmsac.vf AB10_re, B00_im, A10_im + th.vfmacc.vf AB10_im, B00_re, A10_im + th.vfmacc.vf AB10_im, B00_im, A10_re + + th.vfmacc.vf AB01_re, B01_re, A00_re // AB(:,1) += A(:,l) * B(l,1) + th.vfnmsac.vf AB01_re, B01_im, A00_im + th.vfmacc.vf AB01_im, B01_re, A00_im + th.vfmacc.vf AB01_im, B01_im, A00_re + th.vfmacc.vf AB11_re, B01_re, A10_re + th.vfnmsac.vf AB11_re, B01_im, A10_im + th.vfmacc.vf AB11_im, B01_re, A10_im + th.vfmacc.vf AB11_im, B01_im, A10_re + + // Point to A(:,l+2), A(:,l+3) + add A00_ptr, A01_ptr, s0 + add A10_ptr, A11_ptr, s0 + add A01_ptr, A00_ptr, s0 + add A11_ptr, A10_ptr, s0 + + // Load B(l+1,0:3) + FLOAD B10_re, 8*REALSIZE(B_row_ptr) + FLOAD B10_im, 9*REALSIZE(B_row_ptr) + FLOAD B11_re, 10*REALSIZE(B_row_ptr) + FLOAD B11_im, 11*REALSIZE(B_row_ptr) + FLOAD B12_re, 12*REALSIZE(B_row_ptr) + FLOAD B12_im, 13*REALSIZE(B_row_ptr) + FLOAD B13_re, 14*REALSIZE(B_row_ptr) + FLOAD B13_im, 15*REALSIZE(B_row_ptr) + addi B_row_ptr, B_row_ptr, 16*REALSIZE + + th.vfmacc.vf AB00_re, B10_re, A01_re // AB(:,0) += A(:,l+1) * B(l+1,0) + th.vfnmsac.vf AB00_re, B10_im, A01_im + th.vfmacc.vf AB00_im, B10_re, A01_im + th.vfmacc.vf AB00_im, B10_im, A01_re + th.vfmacc.vf AB10_re, B10_re, A11_re + th.vfnmsac.vf AB10_re, B10_im, A11_im + th.vfmacc.vf AB10_im, B10_re, A11_im + th.vfmacc.vf AB10_im, B10_im, A11_re + + th.vfmacc.vf AB02_re, B02_re, A00_re // AB(:,2) += A(:,l) * B(l,2) + th.vfnmsac.vf AB02_re, B02_im, A00_im + th.vfmacc.vf AB02_im, B02_re, A00_im + th.vfmacc.vf AB02_im, B02_im, A00_re + th.vfmacc.vf AB12_re, B02_re, A10_re + th.vfnmsac.vf AB12_re, B02_im, A10_im + th.vfmacc.vf AB12_im, B02_re, A10_im + th.vfmacc.vf AB12_im, B02_im, A10_re + + th.vfmacc.vf AB03_re, B03_re, A00_re // AB(:,3) += A(:,l) * B(l,3) + th.vfnmsac.vf AB03_re, B03_im, A00_im + th.vfmacc.vf AB03_im, B03_re, A00_im + th.vfmacc.vf AB03_im, B03_im, A00_re + th.vfmacc.vf AB13_re, B03_re, A10_re + th.vfnmsac.vf AB13_re, B03_im, A10_im + th.vfmacc.vf AB13_im, B03_re, A10_im + th.vfmacc.vf AB13_im, B03_im, A10_re + + // Load and deinterleave A(:,l+2) + VLE A00_re, (A00_ptr) + VLE A10_re, (A10_ptr) + + // Load B(l+2, 0:3) + FLOAD B00_re, 0*REALSIZE(B_row_ptr) + FLOAD B00_im, 1*REALSIZE(B_row_ptr) + FLOAD B01_re, 2*REALSIZE(B_row_ptr) + FLOAD B01_im, 3*REALSIZE(B_row_ptr) + FLOAD B02_re, 4*REALSIZE(B_row_ptr) + FLOAD B02_im, 5*REALSIZE(B_row_ptr) + FLOAD B03_re, 6*REALSIZE(B_row_ptr) + FLOAD B03_im, 7*REALSIZE(B_row_ptr) + + th.vfmacc.vf AB01_re, B11_re, A01_re // AB(:,1) += A(:,l+1) * B(l+1,1) + th.vfnmsac.vf AB01_re, B11_im, A01_im + th.vfmacc.vf AB01_im, B11_re, A01_im + th.vfmacc.vf AB01_im, B11_im, A01_re + th.vfmacc.vf AB11_re, B11_re, A11_re + th.vfnmsac.vf AB11_re, B11_im, A11_im + th.vfmacc.vf AB11_im, B11_re, A11_im + th.vfmacc.vf AB11_im, B11_im, A11_re + + th.vfmacc.vf AB02_re, B12_re, A01_re // AB(:,2) += A(:,l+1) * B(l+1,2) + th.vfnmsac.vf AB02_re, B12_im, A01_im + th.vfmacc.vf AB02_im, B12_re, A01_im + th.vfmacc.vf AB02_im, B12_im, A01_re + th.vfmacc.vf AB12_re, B12_re, A11_re + th.vfnmsac.vf AB12_re, B12_im, A11_im + th.vfmacc.vf AB12_im, B12_re, A11_im + th.vfmacc.vf AB12_im, B12_im, A11_re + + th.vfmacc.vf AB03_re, B13_re, A01_re // AB(:,3) += A(:,l+1) * B(l+1,3) + th.vfnmsac.vf AB03_re, B13_im, A01_im + th.vfmacc.vf AB03_im, B13_re, A01_im + th.vfmacc.vf AB03_im, B13_im, A01_re + th.vfmacc.vf AB13_re, B13_re, A11_re + th.vfnmsac.vf AB13_re, B13_im, A11_im + th.vfmacc.vf AB13_im, B13_re, A11_im + th.vfmacc.vf AB13_im, B13_im, A11_re + + // Load and deinterleave A(:,l+3) + VLE A01_re, (A01_ptr) + VLE A11_re, (A11_ptr) + + // Point to A(:,l+2), A(:,l+3) + add A00_ptr, A01_ptr, s0 + add A10_ptr, A11_ptr, s0 + add A01_ptr, A00_ptr, s0 + add A11_ptr, A10_ptr, s0 + + // Load B(l+3, 0:3) + FLOAD B10_re, 8*REALSIZE(B_row_ptr) + FLOAD B10_im, 9*REALSIZE(B_row_ptr) + FLOAD B11_re, 10*REALSIZE(B_row_ptr) + FLOAD B11_im, 11*REALSIZE(B_row_ptr) + FLOAD B12_re, 12*REALSIZE(B_row_ptr) + FLOAD B12_im, 13*REALSIZE(B_row_ptr) + FLOAD B13_re, 14*REALSIZE(B_row_ptr) + FLOAD B13_im, 15*REALSIZE(B_row_ptr) + addi B_row_ptr, B_row_ptr, 16*REALSIZE + + th.vfmacc.vf AB00_re, B00_re, A00_re // AB(:,0) += A(:,l+2) * B(l+2,0) + th.vfnmsac.vf AB00_re, B00_im, A00_im + th.vfmacc.vf AB00_im, B00_re, A00_im + th.vfmacc.vf AB00_im, B00_im, A00_re + th.vfmacc.vf AB10_re, B00_re, A10_re + th.vfnmsac.vf AB10_re, B00_im, A10_im + th.vfmacc.vf AB10_im, B00_re, A10_im + th.vfmacc.vf AB10_im, B00_im, A10_re + + th.vfmacc.vf AB00_re, B10_re, A01_re // AB(:,0) += A(:,l+3) * B(l+3,0) + th.vfnmsac.vf AB00_re, B10_im, A01_im + th.vfmacc.vf AB00_im, B10_re, A01_im + th.vfmacc.vf AB00_im, B10_im, A01_re + th.vfmacc.vf AB10_re, B10_re, A11_re + th.vfnmsac.vf AB10_re, B10_im, A11_im + th.vfmacc.vf AB10_im, B10_re, A11_im + th.vfmacc.vf AB10_im, B10_im, A11_re + + th.vfmacc.vf AB01_re, B01_re, A00_re // AB(:,1) += A(:,l+2) * B(l+2,1) + th.vfnmsac.vf AB01_re, B01_im, A00_im + th.vfmacc.vf AB01_im, B01_re, A00_im + th.vfmacc.vf AB01_im, B01_im, A00_re + th.vfmacc.vf AB11_re, B01_re, A10_re + th.vfnmsac.vf AB11_re, B01_im, A10_im + th.vfmacc.vf AB11_im, B01_re, A10_im + th.vfmacc.vf AB11_im, B01_im, A10_re + + th.vfmacc.vf AB01_re, B11_re, A01_re // AB(:,1) += A(:,l+3) * B(l+3,1) + th.vfnmsac.vf AB01_re, B11_im, A01_im + th.vfmacc.vf AB01_im, B11_re, A01_im + th.vfmacc.vf AB01_im, B11_im, A01_re + th.vfmacc.vf AB11_re, B11_re, A11_re + th.vfnmsac.vf AB11_re, B11_im, A11_im + th.vfmacc.vf AB11_im, B11_re, A11_im + th.vfmacc.vf AB11_im, B11_im, A11_re + + th.vfmacc.vf AB02_re, B02_re, A00_re // AB(:,2) += A(:,l+2) * B(l+2,2) + th.vfnmsac.vf AB02_re, B02_im, A00_im + th.vfmacc.vf AB02_im, B02_re, A00_im + th.vfmacc.vf AB02_im, B02_im, A00_re + th.vfmacc.vf AB12_re, B02_re, A10_re + th.vfnmsac.vf AB12_re, B02_im, A10_im + th.vfmacc.vf AB12_im, B02_re, A10_im + th.vfmacc.vf AB12_im, B02_im, A10_re + + th.vfmacc.vf AB02_re, B12_re, A01_re // AB(:,2) += A(:,l+3) * B(l+3,2) + th.vfnmsac.vf AB02_re, B12_im, A01_im + th.vfmacc.vf AB02_im, B12_re, A01_im + th.vfmacc.vf AB02_im, B12_im, A01_re + th.vfmacc.vf AB12_re, B12_re, A11_re + th.vfnmsac.vf AB12_re, B12_im, A11_im + th.vfmacc.vf AB12_im, B12_re, A11_im + th.vfmacc.vf AB12_im, B12_im, A11_re + + th.vfmacc.vf AB03_re, B03_re, A00_re // AB(:,3) += A(:,l+2) * B(l+2,3) + th.vfnmsac.vf AB03_re, B03_im, A00_im + th.vfmacc.vf AB03_im, B03_re, A00_im + th.vfmacc.vf AB03_im, B03_im, A00_re + th.vfmacc.vf AB13_re, B03_re, A10_re + th.vfnmsac.vf AB13_re, B03_im, A10_im + th.vfmacc.vf AB13_im, B03_re, A10_im + th.vfmacc.vf AB13_im, B03_im, A10_re + + th.vfmacc.vf AB03_re, B13_re, A01_re // AB(:,3) += A(:,l+3) * B(l+3,3) + th.vfnmsac.vf AB03_re, B13_im, A01_im + th.vfmacc.vf AB03_im, B13_re, A01_im + th.vfmacc.vf AB03_im, B13_im, A01_re + th.vfmacc.vf AB13_re, B13_re, A11_re + th.vfnmsac.vf AB13_re, B13_im, A11_im + th.vfmacc.vf AB13_im, B13_re, A11_im + th.vfmacc.vf AB13_im, B13_im, A11_re + + li tmp, 3 + ble loop_counter, tmp, TAIL_UNROLL_2 + + // Load A and B for the next iteration + VLE A00_re, (A00_ptr) + VLE A10_re, (A10_ptr) + VLE A01_re, (A01_ptr) + VLE A11_re, (A11_ptr) + + FLOAD B00_re, 0*REALSIZE(B_row_ptr) + FLOAD B00_im, 1*REALSIZE(B_row_ptr) + FLOAD B01_re, 2*REALSIZE(B_row_ptr) + FLOAD B01_im, 3*REALSIZE(B_row_ptr) + FLOAD B02_re, 4*REALSIZE(B_row_ptr) + FLOAD B02_im, 5*REALSIZE(B_row_ptr) + FLOAD B03_re, 6*REALSIZE(B_row_ptr) + FLOAD B03_im, 7*REALSIZE(B_row_ptr) + + j LOOP_UNROLL_4 + +TAIL_UNROLL_2: // loop_counter <= 3 + li tmp, 1 + ble loop_counter, tmp, TAIL_UNROLL_1 + + addi loop_counter, loop_counter, -2 + + // Load and deinterleave A(:,l) + VLE A00_re, (A00_ptr) + VLE A10_re, (A10_ptr) + + // Load B(l, 0:3) + FLOAD B00_re, 0*REALSIZE(B_row_ptr) + FLOAD B00_im, 1*REALSIZE(B_row_ptr) + FLOAD B01_re, 2*REALSIZE(B_row_ptr) + FLOAD B01_im, 3*REALSIZE(B_row_ptr) + FLOAD B02_re, 4*REALSIZE(B_row_ptr) + FLOAD B02_im, 5*REALSIZE(B_row_ptr) + FLOAD B03_re, 6*REALSIZE(B_row_ptr) + FLOAD B03_im, 7*REALSIZE(B_row_ptr) + + th.vfmacc.vf AB00_re, B00_re, A00_re // AB(:,0) += A(:,l) * B(l,0) + th.vfnmsac.vf AB00_re, B00_im, A00_im + th.vfmacc.vf AB00_im, B00_re, A00_im + th.vfmacc.vf AB00_im, B00_im, A00_re + th.vfmacc.vf AB10_re, B00_re, A10_re + th.vfnmsac.vf AB10_re, B00_im, A10_im + th.vfmacc.vf AB10_im, B00_re, A10_im + th.vfmacc.vf AB10_im, B00_im, A10_re + + th.vfmacc.vf AB01_re, B01_re, A00_re // AB(:,1) += A(:,l) * B(l,1) + th.vfnmsac.vf AB01_re, B01_im, A00_im + th.vfmacc.vf AB01_im, B01_re, A00_im + th.vfmacc.vf AB01_im, B01_im, A00_re + th.vfmacc.vf AB11_re, B01_re, A10_re + th.vfnmsac.vf AB11_re, B01_im, A10_im + th.vfmacc.vf AB11_im, B01_re, A10_im + th.vfmacc.vf AB11_im, B01_im, A10_re + + // Load and deinterleave A(:,l+1) + VLE A01_re, (A01_ptr) + VLE A11_re, (A11_ptr) + + // Load B(l+1, 0:3) + FLOAD B10_re, 8*REALSIZE(B_row_ptr) + FLOAD B10_im, 9*REALSIZE(B_row_ptr) + FLOAD B11_re, 10*REALSIZE(B_row_ptr) + FLOAD B11_im, 11*REALSIZE(B_row_ptr) + FLOAD B12_re, 12*REALSIZE(B_row_ptr) + FLOAD B12_im, 13*REALSIZE(B_row_ptr) + FLOAD B13_re, 14*REALSIZE(B_row_ptr) + FLOAD B13_im, 15*REALSIZE(B_row_ptr) + + th.vfmacc.vf AB00_re, B10_re, A01_re // AB(:,0) += A(:,l+1) * B(l+1,0) + th.vfnmsac.vf AB00_re, B10_im, A01_im + th.vfmacc.vf AB00_im, B10_re, A01_im + th.vfmacc.vf AB00_im, B10_im, A01_re + th.vfmacc.vf AB10_re, B10_re, A11_re + th.vfnmsac.vf AB10_re, B10_im, A11_im + th.vfmacc.vf AB10_im, B10_re, A11_im + th.vfmacc.vf AB10_im, B10_im, A11_re + + th.vfmacc.vf AB01_re, B11_re, A01_re // AB(:,1) += A(:,l+1) * B(l+1,1) + th.vfnmsac.vf AB01_re, B11_im, A01_im + th.vfmacc.vf AB01_im, B11_re, A01_im + th.vfmacc.vf AB01_im, B11_im, A01_re + th.vfmacc.vf AB11_re, B11_re, A11_re + th.vfnmsac.vf AB11_re, B11_im, A11_im + th.vfmacc.vf AB11_im, B11_re, A11_im + th.vfmacc.vf AB11_im, B11_im, A11_re + + th.vfmacc.vf AB02_re, B02_re, A00_re // AB(:,2) += A(:,l) * B(l,2) + th.vfnmsac.vf AB02_re, B02_im, A00_im + th.vfmacc.vf AB02_im, B02_re, A00_im + th.vfmacc.vf AB02_im, B02_im, A00_re + th.vfmacc.vf AB12_re, B02_re, A10_re + th.vfnmsac.vf AB12_re, B02_im, A10_im + th.vfmacc.vf AB12_im, B02_re, A10_im + th.vfmacc.vf AB12_im, B02_im, A10_re + + th.vfmacc.vf AB03_re, B03_re, A00_re // AB(:,3) += A(:,l) * B(l,3) + th.vfnmsac.vf AB03_re, B03_im, A00_im + th.vfmacc.vf AB03_im, B03_re, A00_im + th.vfmacc.vf AB03_im, B03_im, A00_re + th.vfmacc.vf AB13_re, B03_re, A10_re + th.vfnmsac.vf AB13_re, B03_im, A10_im + th.vfmacc.vf AB13_im, B03_re, A10_im + th.vfmacc.vf AB13_im, B03_im, A10_re + + th.vfmacc.vf AB02_re, B12_re, A01_re // AB(:,2) += A(:,l+1) * B(l+1,2) + th.vfnmsac.vf AB02_re, B12_im, A01_im + th.vfmacc.vf AB02_im, B12_re, A01_im + th.vfmacc.vf AB02_im, B12_im, A01_re + th.vfmacc.vf AB12_re, B12_re, A11_re + th.vfnmsac.vf AB12_re, B12_im, A11_im + th.vfmacc.vf AB12_im, B12_re, A11_im + th.vfmacc.vf AB12_im, B12_im, A11_re + + th.vfmacc.vf AB03_re, B13_re, A01_re // AB(:,3) += A(:,l+1) * B(l+1,3) + th.vfnmsac.vf AB03_re, B13_im, A01_im + th.vfmacc.vf AB03_im, B13_re, A01_im + th.vfmacc.vf AB03_im, B13_im, A01_re + th.vfmacc.vf AB13_re, B13_re, A11_re + th.vfnmsac.vf AB13_re, B13_im, A11_im + th.vfmacc.vf AB13_im, B13_re, A11_im + th.vfmacc.vf AB13_im, B13_im, A11_re + + beqz loop_counter, MULTIPLYALPHA + + // Advance pointers + add A00_ptr, A01_ptr, s0 + add A10_ptr, A11_ptr, s0 + addi B_row_ptr, B_row_ptr, 16*REALSIZE + +TAIL_UNROLL_1: // loop_counter <= 1 + beqz loop_counter, MULTIPLYALPHA + + // Load and deinterleave A(:,l) + VLE A00_re, (A00_ptr) + VLE A10_re, (A10_ptr) + + // Load B(l,0:3) + FLOAD B00_re, 0*REALSIZE(B_row_ptr) + FLOAD B00_im, 1*REALSIZE(B_row_ptr) + FLOAD B01_re, 2*REALSIZE(B_row_ptr) + FLOAD B01_im, 3*REALSIZE(B_row_ptr) + FLOAD B02_re, 4*REALSIZE(B_row_ptr) + FLOAD B02_im, 5*REALSIZE(B_row_ptr) + FLOAD B03_re, 6*REALSIZE(B_row_ptr) + FLOAD B03_im, 7*REALSIZE(B_row_ptr) + + th.vfmacc.vf AB00_re, B00_re, A00_re // AB(:,0) += A(:,l) * B(l,0) + th.vfnmsac.vf AB00_re, B00_im, A00_im + th.vfmacc.vf AB00_im, B00_re, A00_im + th.vfmacc.vf AB00_im, B00_im, A00_re + th.vfmacc.vf AB10_re, B00_re, A10_re + th.vfnmsac.vf AB10_re, B00_im, A10_im + th.vfmacc.vf AB10_im, B00_re, A10_im + th.vfmacc.vf AB10_im, B00_im, A10_re + + th.vfmacc.vf AB01_re, B01_re, A00_re // AB(:,1) += A(:,l) * B(l,1) + th.vfnmsac.vf AB01_re, B01_im, A00_im + th.vfmacc.vf AB01_im, B01_re, A00_im + th.vfmacc.vf AB01_im, B01_im, A00_re + th.vfmacc.vf AB11_re, B01_re, A10_re + th.vfnmsac.vf AB11_re, B01_im, A10_im + th.vfmacc.vf AB11_im, B01_re, A10_im + th.vfmacc.vf AB11_im, B01_im, A10_re + + th.vfmacc.vf AB02_re, B02_re, A00_re // AB(:,2) += A(:,l) * B(l,2) + th.vfnmsac.vf AB02_re, B02_im, A00_im + th.vfmacc.vf AB02_im, B02_re, A00_im + th.vfmacc.vf AB02_im, B02_im, A00_re + th.vfmacc.vf AB12_re, B02_re, A10_re + th.vfnmsac.vf AB12_re, B02_im, A10_im + th.vfmacc.vf AB12_im, B02_re, A10_im + th.vfmacc.vf AB12_im, B02_im, A10_re + + th.vfmacc.vf AB03_re, B03_re, A00_re // AB(:,3) += A(:,l) * B(l,3) + th.vfnmsac.vf AB03_re, B03_im, A00_im + th.vfmacc.vf AB03_im, B03_re, A00_im + th.vfmacc.vf AB03_im, B03_im, A00_re + th.vfmacc.vf AB13_re, B03_re, A10_re + th.vfnmsac.vf AB13_re, B03_im, A10_im + th.vfmacc.vf AB13_im, B03_re, A10_im + th.vfmacc.vf AB13_im, B03_im, A10_re + +MULTIPLYALPHA: + FLOAD ALPHA_re, 0*REALSIZE(a1) + FLOAD ALPHA_im, 1*REALSIZE(a1) + + FEQ tmp, ALPHA_im, fzero + bne tmp, zero, ALPHAREAL + + // [AB00, ..., AB03] * alpha + th.vfmul.vf tmp0_re, AB00_im, ALPHA_im + th.vfmul.vf tmp0_im, AB00_re, ALPHA_im + th.vfmul.vf tmp1_re, AB01_im, ALPHA_im + th.vfmul.vf tmp1_im, AB01_re, ALPHA_im + th.vfmul.vf tmp2_re, AB02_im, ALPHA_im + th.vfmul.vf tmp2_im, AB02_re, ALPHA_im + th.vfmul.vf tmp3_re, AB03_im, ALPHA_im + th.vfmul.vf tmp3_im, AB03_re, ALPHA_im + th.vfmsub.vf AB00_re, ALPHA_re, tmp0_re + th.vfmsub.vf AB01_re, ALPHA_re, tmp1_re + th.vfmsub.vf AB02_re, ALPHA_re, tmp2_re + th.vfmsub.vf AB03_re, ALPHA_re, tmp3_re + th.vfmadd.vf AB00_im, ALPHA_re, tmp0_im + th.vfmadd.vf AB01_im, ALPHA_re, tmp1_im + th.vfmadd.vf AB02_im, ALPHA_re, tmp2_im + th.vfmadd.vf AB03_im, ALPHA_re, tmp3_im + + // [AB10, ..., AB13] * alpha + th.vfmul.vf tmp0_re, AB10_im, ALPHA_im + th.vfmul.vf tmp0_im, AB10_re, ALPHA_im + th.vfmul.vf tmp1_re, AB11_im, ALPHA_im + th.vfmul.vf tmp1_im, AB11_re, ALPHA_im + th.vfmul.vf tmp2_re, AB12_im, ALPHA_im + th.vfmul.vf tmp2_im, AB12_re, ALPHA_im + th.vfmul.vf tmp3_re, AB13_im, ALPHA_im + th.vfmul.vf tmp3_im, AB13_re, ALPHA_im + th.vfmsub.vf AB10_re, ALPHA_re, tmp0_re + th.vfmsub.vf AB11_re, ALPHA_re, tmp1_re + th.vfmsub.vf AB12_re, ALPHA_re, tmp2_re + th.vfmsub.vf AB13_re, ALPHA_re, tmp3_re + th.vfmadd.vf AB10_im, ALPHA_re, tmp0_im + th.vfmadd.vf AB11_im, ALPHA_re, tmp1_im + th.vfmadd.vf AB12_im, ALPHA_re, tmp2_im + th.vfmadd.vf AB13_im, ALPHA_re, tmp3_im + + j MULTIPLYBETA + +ALPHAREAL: + th.vfmul.vf AB00_re, AB00_re, ALPHA_re + th.vfmul.vf AB00_im, AB00_im, ALPHA_re + th.vfmul.vf AB01_re, AB01_re, ALPHA_re + th.vfmul.vf AB01_im, AB01_im, ALPHA_re + th.vfmul.vf AB02_re, AB02_re, ALPHA_re + th.vfmul.vf AB02_im, AB02_im, ALPHA_re + th.vfmul.vf AB03_re, AB03_re, ALPHA_re + th.vfmul.vf AB03_im, AB03_im, ALPHA_re + + th.vfmul.vf AB10_re, AB10_re, ALPHA_re + th.vfmul.vf AB10_im, AB10_im, ALPHA_re + th.vfmul.vf AB11_re, AB11_re, ALPHA_re + th.vfmul.vf AB11_im, AB11_im, ALPHA_re + th.vfmul.vf AB12_re, AB12_re, ALPHA_re + th.vfmul.vf AB12_im, AB12_im, ALPHA_re + th.vfmul.vf AB13_re, AB13_re, ALPHA_re + th.vfmul.vf AB13_im, AB13_im, ALPHA_re + +MULTIPLYBETA: + FLOAD BETA_re, 0*REALSIZE(a4) + FLOAD BETA_im, 1*REALSIZE(a4) + FEQ tmp, BETA_im, fzero + bne tmp, zero, BETAREAL + + // Load and deinterleave C(0:VLEN-1, 0:1) + VLE C0_re, (C00_ptr) + VLE C1_re, (C01_ptr) + + // Load and deinterleave C(0:VLEN-1, 2:3) + VLE C2_re, (C02_ptr) + VLE C3_re, (C03_ptr) + + // C(0:VLEN-1,0:1) * beta + AB(0:VLEN-1,0:1) + th.vfmacc.vf AB00_re, BETA_re, C0_re + th.vfnmsac.vf AB00_re, BETA_im, C0_im + th.vfmacc.vf AB00_im, BETA_re, C0_im + th.vfmacc.vf AB00_im, BETA_im, C0_re + VSE AB00_re, (C00_ptr) + + th.vfmacc.vf AB01_re, BETA_re, C1_re + th.vfnmsac.vf AB01_re, BETA_im, C1_im + th.vfmacc.vf AB01_im, BETA_re, C1_im + th.vfmacc.vf AB01_im, BETA_im, C1_re + VSE AB01_re, (C01_ptr) + + // C(0:VLEN-1,2:3) * beta + AB(0:VLEN-1,2:3) + th.vfmacc.vf AB02_re, BETA_re, C2_re + th.vfnmsac.vf AB02_re, BETA_im, C2_im + th.vfmacc.vf AB02_im, BETA_re, C2_im + th.vfmacc.vf AB02_im, BETA_im, C2_re + VSE AB02_re, (C02_ptr) + + th.vfmacc.vf AB03_re, BETA_re, C3_re + th.vfnmsac.vf AB03_re, BETA_im, C3_im + th.vfmacc.vf AB03_im, BETA_re, C3_im + th.vfmacc.vf AB03_im, BETA_im, C3_re + VSE AB03_re, (C03_ptr) + + // Load and deinterleave C(VLEN:2*VLEN-1, 0:1) + VLE C0_re, (C10_ptr) + VLE C1_re, (C11_ptr) + + // Load and deinterleave C(VLEN:2*VLEN-1, 2:3) + VLE C2_re, (C12_ptr) + VLE C3_re, (C13_ptr) + + // C(VLEN:2*VLEN-1,0:1) * beta + AB(VLEN:2*VLEN-1,0:1) + th.vfmacc.vf AB10_re, BETA_re, C0_re + th.vfnmsac.vf AB10_re, BETA_im, C0_im + th.vfmacc.vf AB10_im, BETA_re, C0_im + th.vfmacc.vf AB10_im, BETA_im, C0_re + VSE AB10_re, (C10_ptr) + + th.vfmacc.vf AB11_re, BETA_re, C1_re + th.vfnmsac.vf AB11_re, BETA_im, C1_im + th.vfmacc.vf AB11_im, BETA_re, C1_im + th.vfmacc.vf AB11_im, BETA_im, C1_re + VSE AB11_re, (C11_ptr) + + // C(VLEN:2*VLEN-1,2:3) * beta + AB(VLEN:2*VLEN-1,2:3) + th.vfmacc.vf AB12_re, BETA_re, C2_re + th.vfnmsac.vf AB12_re, BETA_im, C2_im + th.vfmacc.vf AB12_im, BETA_re, C2_im + th.vfmacc.vf AB12_im, BETA_im, C2_re + VSE AB12_re, (C12_ptr) + + th.vfmacc.vf AB13_re, BETA_re, C3_re + th.vfnmsac.vf AB13_re, BETA_im, C3_im + th.vfmacc.vf AB13_im, BETA_re, C3_im + th.vfmacc.vf AB13_im, BETA_im, C3_re + VSE AB13_re, (C13_ptr) + + j END + +BETAREAL: + FEQ tmp, BETA_re, fzero + bne tmp, zero, BETAZERO + + // Load and deinterleave C(0:VLEN-1, 0:3) + VLE C0_re, (C00_ptr) + VLE C1_re, (C01_ptr) + VLE C2_re, (C02_ptr) + VLE C3_re, (C03_ptr) + + // C(0:VLEN-1,0:3) * beta + AB(0:VLEN-1,0:3) + th.vfmacc.vf AB00_re, BETA_re, C0_re + th.vfmacc.vf AB00_im, BETA_re, C0_im + th.vfmacc.vf AB01_re, BETA_re, C1_re + th.vfmacc.vf AB01_im, BETA_re, C1_im + + th.vfmacc.vf AB02_re, BETA_re, C2_re + th.vfmacc.vf AB02_im, BETA_re, C2_im + th.vfmacc.vf AB03_re, BETA_re, C3_re + th.vfmacc.vf AB03_im, BETA_re, C3_im + + VSE AB00_re, (C00_ptr) + VSE AB01_re, (C01_ptr) + VSE AB02_re, (C02_ptr) + VSE AB03_re, (C03_ptr) + + // Load and deinterleave C(VLEN:2*VLEN-1, 0:3) + VLE C0_re, (C10_ptr) + VLE C1_re, (C11_ptr) + VLE C2_re, (C12_ptr) + VLE C3_re, (C13_ptr) + + // C(VLEN:2*VLEN-1,0:3) * beta + AB(VLEN:2*VLEN-1,0:3) + th.vfmacc.vf AB10_re, BETA_re, C0_re + th.vfmacc.vf AB10_im, BETA_re, C0_im + th.vfmacc.vf AB11_re, BETA_re, C1_re + th.vfmacc.vf AB11_im, BETA_re, C1_im + + th.vfmacc.vf AB12_re, BETA_re, C2_re + th.vfmacc.vf AB12_im, BETA_re, C2_im + th.vfmacc.vf AB13_re, BETA_re, C3_re + th.vfmacc.vf AB13_im, BETA_re, C3_im + + VSE AB10_re, (C10_ptr) + VSE AB11_re, (C11_ptr) + VSE AB12_re, (C12_ptr) + VSE AB13_re, (C13_ptr) + + j END + +BETAZERO: + VSE AB00_re, (C00_ptr) + VSE AB01_re, (C01_ptr) + VSE AB02_re, (C02_ptr) + VSE AB03_re, (C03_ptr) + + VSE AB10_re, (C10_ptr) + VSE AB11_re, (C11_ptr) + VSE AB12_re, (C12_ptr) + VSE AB13_re, (C13_ptr) + +END: + #include "rvv_sg2042_restore_registers.h" + ret diff --git a/kernels/rvv_sg2042/3/bli_dgemm_rvv_sg2042_4vx4.c b/kernels/rvv_sg2042/3/bli_dgemm_rvv_sg2042_4vx4.c new file mode 100644 index 000000000..04b241377 --- /dev/null +++ b/kernels/rvv_sg2042/3/bli_dgemm_rvv_sg2042_4vx4.c @@ -0,0 +1,79 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2023, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +*/ +#include "bli_rvv_sg2042_utils.h" + +void bli_dgemm_rvv_sg2042_asm_4vx4 + ( + intptr_t k, + const void* alpha, + const void* a, + const void* b, + const void* beta, + void* c, intptr_t rs_c, intptr_t cs_c + ); + +void bli_dgemm_rvv_sg2042_4vx4 + ( + dim_t m, + dim_t n, + dim_t k, + const void* alpha, + const void* a, + const void* b, + const void* beta, + void* c, inc_t rs_c, inc_t cs_c, + const auxinfo_t* data, + const cntx_t* cntx + ) +{ + // The assembly kernels always take native machine-sized integer arguments. + // dim_t and inc_t are normally defined as being machine-sized. If larger, assert. + bli_static_assert( sizeof(dim_t) <= sizeof(intptr_t) && + sizeof(inc_t) <= sizeof(intptr_t) ); + + // Extract vector-length dependent mr, nr that are fixed at configure time. + const inc_t mr = bli_cntx_get_blksz_def_dt( BLIS_DOUBLE, BLIS_MR, cntx ); + const inc_t nr = 4; + + GEMM_UKR_SETUP_CT( d, mr, nr, false ); + + // The kernel assumes rs_c == 1, and the context should not deviate from it. + assert( rs_c == 1 ); + + bli_dgemm_rvv_sg2042_asm_4vx4( k, alpha, a, b, beta, c, + get_vlenb(), cs_c * sizeof(double) ); + + GEMM_UKR_FLUSH_CT( d ); +} diff --git a/kernels/rvv_sg2042/3/bli_dgemm_rvv_sg2042_asm_4vx4.S b/kernels/rvv_sg2042/3/bli_dgemm_rvv_sg2042_asm_4vx4.S new file mode 100644 index 000000000..5ada1f871 --- /dev/null +++ b/kernels/rvv_sg2042/3/bli_dgemm_rvv_sg2042_asm_4vx4.S @@ -0,0 +1,45 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2023, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +*/ + +#define REALNAME bli_dgemm_rvv_sg2042_asm_4vx4 +#define DATASIZE 8 +#define VTYPE e64 +#define FLOAD fld +#define FZERO(fr) fcvt.d.w fr, x0 +#define FEQ feq.d +#define VLE th.vle.v +#define VSE th.vse.v + +#include "bli_sdgemm_rvv_sg2042_asm_4vx4.h" diff --git a/kernels/rvv_sg2042/3/bli_rvv_sg2042_utils.h b/kernels/rvv_sg2042/3/bli_rvv_sg2042_utils.h new file mode 100644 index 000000000..e4570321d --- /dev/null +++ b/kernels/rvv_sg2042/3/bli_rvv_sg2042_utils.h @@ -0,0 +1,46 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2023, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" +#include + +static inline uintptr_t get_vlenb(void) +{ + uintptr_t vlenb = 0; + __asm__ volatile ( + " csrr %0, vlenb" // vector length in bytes + : "=r" (vlenb) + ); + return vlenb; +} diff --git a/kernels/rvv_sg2042/3/bli_sdgemm_rvv_sg2042_asm_4vx4.h b/kernels/rvv_sg2042/3/bli_sdgemm_rvv_sg2042_asm_4vx4.h new file mode 100644 index 000000000..2fe68d1d6 --- /dev/null +++ b/kernels/rvv_sg2042/3/bli_sdgemm_rvv_sg2042_asm_4vx4.h @@ -0,0 +1,370 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2023, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +*/ + +.text +.align 2 +.global REALNAME + +// void REALNAME(intptr_t k, void* alpha, void* a, void* b, +// void* beta, void* c, intptr_t rs_c, intptr_t cs_c) +// +// register arguments: +// a0 k +// a1 alpha +// a2 a +// a3 b +// a4 beta +// a5 c +// a6 rs_c +// a7 cs_c +// + +// C11 := beta * C11 + alpha * A1 * B1 +/* +In case of double precision gemm with 128 bit vector registers +this kernel computes a 8 x 4 microtile result matrix c11 using two micropanels a1 and b1, +respectively with dimensions of 8 x k and k x 4. +For every rank 1 update (columns row prodcut) we produce and accumulate a intermadiate matrix result +ab with dimensions 8 x 4 (we use loop unrolling with factor of 4 in the loop). +After k iterations we copy content of ab in c11 as final result. +The loop is designed to iterate with loop unrolling = 4, +when we are left with <= 3 columns and rows left we branch into special cases handles. + + + c11: a1: b1: + _______ ________________________ _______ + | | |0 8 10 18 | |0 1 2 3| + | | |1 9 11 19 | |4 5 6 7| + | | += |2 A 12 1A | |8 9 A B| +MR = 8 | | |3 B 13 1B . . . | |C D E F| + | | |4 C 14 1C | | | + | | |5 D 15 1D | | | + | | |6 E 16 1E | | | k + |_______| |7_F_17_1F_______________| | | + | . | + NR = 4 k | . | + | | + ab: | | + _________ |_______| + |1 9 11 19| + |2 A 12 1A| + |3 B 13 1B| + MR = 8 |4 C 14 1C| + |5 D 15 1D| + |6 E 16 1E| + |7_F_17_1F| + + NR = 4 +*/ + +// we save the k (number of columns of a1 and rows of b1) in "loop_counter" +#define loop_counter a0 + +// we hold pointers to two A columns at any time +#define AX0_ptr a2 +#define AX1_ptr s5 + +// we hold a pointer to a B row at any time +#define B_row_ptr a3 + +// we hold pointers to C columns +#define CX0_ptr a5 +#define CX1_ptr t3 +#define CX2_ptr t4 +#define CX3_ptr t5 + +#define tmp t6 + +#define ALPHA fa1 +#define BETA fa2 + +// we hold two rows of B at any time in scalar registers fa4...fa7 and fa0...fa3 +#define B00 fa4 +#define B01 fa5 +#define B02 fa6 +#define B03 fa7 + +#define B10 fa0 +#define B11 fa1 +#define B12 fa2 +#define B13 fa3 + +#define fzero ft8 + +// we hold two columns of A at any time in vector registers v24...x28 +#define AX0 v24 +#define AX1 v28 + +// we hold the whole C matrix in vector registers v16...v28 +#define CX0 v16 +#define CX1 v20 +#define CX2 v24 +#define CX3 v28 + +// we hold the whole AB matrix in vector registers v0...v12 +#define ABX0 v0 +#define ABX1 v4 +#define ABX2 v8 +#define ABX3 v12 + +// column stride +#define cs_c a7 + +REALNAME: +#include "rvv_sg2042_save_registers.h" + +// we set LMUL = 4 for fully exploit register grouping +th.vsetvli s0, zero, VTYPE, m4 +csrr s0, vlenb +FZERO(fzero) + +// Set up pointers +add CX1_ptr, CX0_ptr, cs_c +add CX2_ptr, CX1_ptr, cs_c +add CX3_ptr, CX2_ptr, cs_c + +// Zero-initialize accumulators +th.vxor.vv ABX0, ABX0, ABX0 +th.vxor.vv ABX2, ABX2, ABX2 +th.vxor.vv ABX2, ABX2, ABX2 +th.vxor.vv ABX3, ABX3, ABX3 + +// Handle k == 0 +beqz loop_counter, MULTIPLYBETA + +slli s0, s0, 2 // length of a column of A in bytes + +li tmp, 3 +ble loop_counter, tmp, TAIL_UNROLL_2 + +// Preload A and B +// Load A(:,l) +VLE AX0, (AX0_ptr) + +// Load B(l,0:3) +FLOAD B00, 0*DATASIZE(B_row_ptr) +FLOAD B01, 1*DATASIZE(B_row_ptr) +FLOAD B02, 2*DATASIZE(B_row_ptr) +FLOAD B03, 3*DATASIZE(B_row_ptr) + +// Set up pointers to A(:,l+1) +add AX1_ptr, AX0_ptr, s0 + +LOOP_UNROLL_4: +addi loop_counter, loop_counter, -4 + +// compute and accumulate AB with first column of A and first row of B +th.vfmacc.vf ABX0, B00, AX0 // AB(X,0) += A(X,0) * B(0,0) +th.vfmacc.vf ABX2, B01, AX0 // AB(X,1) += A(X,0) * B(0,1) +th.vfmacc.vf ABX2, B02, AX0 // AB(X,2) += A(X,0) * B(0,2) +th.vfmacc.vf ABX3, B03, AX0 // AB(X,3) += A(X,0) * B(0,3) + +// Load B(l+1,0:3) +FLOAD B10, 4*DATASIZE(B_row_ptr) +FLOAD B11, 5*DATASIZE(B_row_ptr) +FLOAD B12, 6*DATASIZE(B_row_ptr) +FLOAD B13, 7*DATASIZE(B_row_ptr) +addi B_row_ptr, B_row_ptr, 8*DATASIZE + +// Load A(:,l+1) +VLE AX1, (AX1_ptr) + +// Point to A(:,l+2) +add AX0_ptr, AX1_ptr, s0 + +// compute and accumulate AB with second column of A and second row of B +th.vfmacc.vf ABX0, B10, AX1 // AB(X,0) += A(X,1) * B(1,0) +th.vfmacc.vf ABX2, B11, AX1 // AB(X,1) += A(X,1) * B(1,1) +th.vfmacc.vf ABX2, B12, AX1 // AB(X,2) += A(X,1) * B(1,2) +th.vfmacc.vf ABX3, B13, AX1 // AB(X,3) += A(X,1) * B(1,3) + +// Load B(l+2,0:3) +FLOAD B00, 0*DATASIZE(B_row_ptr) +FLOAD B01, 1*DATASIZE(B_row_ptr) +FLOAD B02, 2*DATASIZE(B_row_ptr) +FLOAD B03, 3*DATASIZE(B_row_ptr) + +// Load A(:,l+2) +VLE AX0, (AX0_ptr) + +// Point to A(:,l+3) +add AX1_ptr, AX0_ptr, s0 + +// Load A(:,l+3) +VLE AX1, (AX1_ptr) + +// Point to A(:,l+4) +add AX0_ptr, AX1_ptr, s0 + +// compute and accumulate AB with third column of A and third row of B +th.vfmacc.vf ABX0, B00, AX0 // AB(X,0) += A(X,2) * B(2,0) +th.vfmacc.vf ABX2, B01, AX0 // AB(X,1) += A(X,2) * B(2,1) +th.vfmacc.vf ABX2, B02, AX0 // AB(X,2) += A(X,2) * B(2,2) +th.vfmacc.vf ABX3, B03, AX0 // AB(X,3) += A(X,2) * B(2,3) + +// Load B(l+3,0:3) +FLOAD B10, 4*DATASIZE(B_row_ptr) +FLOAD B11, 5*DATASIZE(B_row_ptr) +FLOAD B12, 6*DATASIZE(B_row_ptr) +FLOAD B13, 7*DATASIZE(B_row_ptr) +addi B_row_ptr, B_row_ptr, 8*DATASIZE + +// compute AB with fourth column of A and fourth row of B +th.vfmacc.vf ABX0, B10, AX1 // AB(X,0) += A(X,3) * B(3,0) +th.vfmacc.vf ABX2, B11, AX1 // AB(X,1) += A(X,3) * B(3,1) +th.vfmacc.vf ABX2, B12, AX1 // AB(X,2) += A(X,3) * B(3,2) +th.vfmacc.vf ABX3, B13, AX1 // AB(X,3) += A(X,3) * B(3,3) + +li tmp, 3 +ble loop_counter, tmp, TAIL_UNROLL_2 + +// Load A and B for the next iteration +// Load B(l,0:3) +FLOAD B00, 0*DATASIZE(B_row_ptr) +FLOAD B01, 1*DATASIZE(B_row_ptr) +FLOAD B02, 2*DATASIZE(B_row_ptr) +FLOAD B03, 3*DATASIZE(B_row_ptr) + +// Load A(:,l) +VLE AX0, (AX0_ptr) + +// Set up pointers to A(:,l+1) +add AX1_ptr, AX0_ptr, s0 + +j LOOP_UNROLL_4 + +TAIL_UNROLL_2: // loop_counter <= 3 +li tmp, 1 +ble loop_counter, tmp, TAIL_UNROLL_1 + +addi loop_counter, loop_counter, -2 + +// Load B(l,0:3) +FLOAD B00, 0*DATASIZE(B_row_ptr) +FLOAD B01, 1*DATASIZE(B_row_ptr) +FLOAD B02, 2*DATASIZE(B_row_ptr) +FLOAD B03, 3*DATASIZE(B_row_ptr) + +// Load A(0:1,l) +VLE AX0, (AX0_ptr) + +// Point to A(:,l+1) +add AX1_ptr, AX0_ptr, s0 + +th.vfmacc.vf ABX0, B00, AX0 // AB(X,0) += A(X,0) * B(0,0) +th.vfmacc.vf ABX2, B01, AX0 // AB(X,1) += A(X,0) * B(0,1) +th.vfmacc.vf ABX2, B02, AX0 // AB(X,2) += A(X,0) * B(0,2) +th.vfmacc.vf ABX3, B03, AX0 // AB(X,3) += A(X,0) * B(0,3) + +// Load B(l+1,0:3) +FLOAD B10, 4*DATASIZE(B_row_ptr) +FLOAD B11, 5*DATASIZE(B_row_ptr) +FLOAD B12, 6*DATASIZE(B_row_ptr) +FLOAD B13, 7*DATASIZE(B_row_ptr) +addi B_row_ptr, B_row_ptr, 8*DATASIZE + +// Load A(:,l+1) +VLE AX1, (AX1_ptr) + +// Point to A(:,l+2) +add AX0_ptr, AX1_ptr, s0 + +th.vfmacc.vf ABX0, B10, AX1 // AB(X,0) += A(X,1) * B(1,0) +th.vfmacc.vf ABX2, B11, AX1 // AB(X,1) += A(X,1) * B(1,1) +th.vfmacc.vf ABX2, B12, AX1 // AB(X,2) += A(X,1) * B(1,2) +th.vfmacc.vf ABX3, B13, AX1 // AB(X,3) += A(X,1) * B(1,3) + +li tmp, 1 +ble loop_counter, tmp, TAIL_UNROLL_1 + +TAIL_UNROLL_1: // loop_counter <= 1 +beqz loop_counter, MULTIPLYALPHA + +// Load row of B +FLOAD B00, 0*DATASIZE(B_row_ptr) +FLOAD B01, 1*DATASIZE(B_row_ptr) +FLOAD B02, 2*DATASIZE(B_row_ptr) +FLOAD B03, 3*DATASIZE(B_row_ptr) + +// Load A(:,l) +VLE AX0, (AX0_ptr) + +th.vfmacc.vf ABX0, B00, AX0 // AB(X,0) += A(X,0) * B(0,0) +th.vfmacc.vf ABX2, B01, AX0 // AB(X,1) += A(X,0) * B(0,1) +th.vfmacc.vf ABX2, B02, AX0 // AB(X,2) += A(X,0) * B(0,2) +th.vfmacc.vf ABX3, B03, AX0 // AB(X,3) += A(X,0) * B(0,3) + +MULTIPLYALPHA: +FLOAD ALPHA, (a1) + +// Multiply with alpha +th.vfmul.vf ABX0, ABX0, ALPHA +th.vfmul.vf ABX2, ABX2, ALPHA +th.vfmul.vf ABX2, ABX2, ALPHA +th.vfmul.vf ABX3, ABX3, ALPHA + +MULTIPLYBETA: +FLOAD BETA, (a4) +FEQ tmp, BETA, fzero +beq tmp, zero, BETANOTZERO + +BETAZERO: + +VSE ABX0, (CX0_ptr) +VSE ABX2, (CX1_ptr) +VSE ABX2, (CX2_ptr) +VSE ABX3, (CX3_ptr) + +j END + +BETANOTZERO: +VLE CX0, (CX0_ptr) // Load C(0:VLEN-1, 0:3) +VLE CX1, (CX1_ptr) +VLE CX2, (CX2_ptr) +VLE CX3, (CX3_ptr) + +th.vfmacc.vf ABX0, BETA, CX0 +th.vfmacc.vf ABX2, BETA, CX1 +th.vfmacc.vf ABX2, BETA, CX2 +th.vfmacc.vf ABX3, BETA, CX3 + +VSE ABX0, (CX0_ptr) // Store C(0:VLEN-1, 0:3) +VSE ABX2, (CX1_ptr) +VSE ABX2, (CX2_ptr) +VSE ABX3, (CX3_ptr) + +END: +#include "rvv_sg2042_restore_registers.h" +ret diff --git a/kernels/rvv_sg2042/3/bli_sgemm_rvv_sg2042_4vx4.c b/kernels/rvv_sg2042/3/bli_sgemm_rvv_sg2042_4vx4.c new file mode 100644 index 000000000..758753920 --- /dev/null +++ b/kernels/rvv_sg2042/3/bli_sgemm_rvv_sg2042_4vx4.c @@ -0,0 +1,80 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2023, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +*/ + +#include "bli_rvv_sg2042_utils.h" + +void bli_sgemm_rvv_sg2042_asm_4vx4 + ( + intptr_t k, + const void* alpha, + const void* a, + const void* b, + const void* beta, + void* c, inc_t rs_c, inc_t cs_c + ); + +void bli_sgemm_rvv_sg2042_4vx4 + ( + dim_t m, + dim_t n, + dim_t k, + const void* alpha, + const void* a, + const void* b, + const void* beta, + void* c, inc_t rs_c, inc_t cs_c, + const auxinfo_t* data, + const cntx_t* cntx + ) +{ + // The assembly kernels always take native machine-sized integer arguments. + // dim_t and inc_t are normally defined as being machine-sized. If larger, assert. + bli_static_assert( sizeof(dim_t) <= sizeof(intptr_t) && + sizeof(inc_t) <= sizeof(intptr_t) ); + + // Extract vector-length dependent mr, nr that are fixed at configure time. + const inc_t mr = bli_cntx_get_blksz_def_dt( BLIS_FLOAT, BLIS_MR, cntx ); + const inc_t nr = 4; + + GEMM_UKR_SETUP_CT( s, mr, nr, false ); + + // The kernel assumes rs_c == 1, and the context should not deviate from it. + assert( rs_c == 1 ); + + bli_sgemm_rvv_sg2042_asm_4vx4( k, alpha, a, b, beta, c, + get_vlenb(), cs_c * sizeof(float) ); + + GEMM_UKR_FLUSH_CT( s ); +} diff --git a/kernels/rvv_sg2042/3/bli_sgemm_rvv_sg2042_asm_4vx4.S b/kernels/rvv_sg2042/3/bli_sgemm_rvv_sg2042_asm_4vx4.S new file mode 100644 index 000000000..45e6c83d7 --- /dev/null +++ b/kernels/rvv_sg2042/3/bli_sgemm_rvv_sg2042_asm_4vx4.S @@ -0,0 +1,45 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2023, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +*/ + +#define REALNAME bli_sgemm_rvv_sg2042_asm_4vx4 +#define DATASIZE 4 +#define VTYPE e32 +#define FLOAD flw +#define FZERO(fr) fcvt.s.w fr, x0 +#define FEQ feq.s +#define VLE th.vlw.v +#define VSE th.vsw.v + +#include "bli_sdgemm_rvv_sg2042_asm_4vx4.h" diff --git a/kernels/rvv_sg2042/3/bli_zgemm_rvv_sg2042_4vx4.c b/kernels/rvv_sg2042/3/bli_zgemm_rvv_sg2042_4vx4.c new file mode 100644 index 000000000..4806cd4be --- /dev/null +++ b/kernels/rvv_sg2042/3/bli_zgemm_rvv_sg2042_4vx4.c @@ -0,0 +1,80 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2023, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "bli_rvv_sg2042_utils.h" + +void bli_zgemm_rvv_sg2042_asm_4vx4 + ( + intptr_t k, + const void* alpha, + const void* a, + const void* b, + const void* beta, + void* c, intptr_t rs_c, intptr_t cs_c + ); + + +void bli_zgemm_rvv_sg2042_4vx4 + ( + dim_t m, + dim_t n, + dim_t k, + const void* alpha, + const void* a, + const void* b, + const void* beta, + void* c, inc_t rs_c, inc_t cs_c, + const auxinfo_t* data, + const cntx_t* cntx + ) +{ + // The assembly kernels always take native machine-sized integer arguments. + // dim_t and inc_t are normally defined as being machine-sized. If larger, assert. + bli_static_assert( sizeof(dim_t) <= sizeof(intptr_t) && + sizeof(inc_t) <= sizeof(intptr_t) ); + + // Extract vector-length dependent mr, nr that are fixed at configure time. + const inc_t mr = bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_MR, cntx ); + const inc_t nr = 4; + + GEMM_UKR_SETUP_CT( z, mr, nr, false ); + + // The kernel assumes rs_c == 1, and the context should not deviate from it. + assert( rs_c == 1 ); + + bli_zgemm_rvv_sg2042_asm_4vx4( k, alpha, a, b, beta, c, + get_vlenb() * 2, cs_c * sizeof(dcomplex) ); + + GEMM_UKR_FLUSH_CT( z ); +} diff --git a/kernels/rvv_sg2042/3/bli_zgemm_rvv_sg2042_asm_4vx4.S b/kernels/rvv_sg2042/3/bli_zgemm_rvv_sg2042_asm_4vx4.S new file mode 100644 index 000000000..94f5607d5 --- /dev/null +++ b/kernels/rvv_sg2042/3/bli_zgemm_rvv_sg2042_asm_4vx4.S @@ -0,0 +1,44 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2023, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#define REALNAME bli_zgemm_rvv_sg2042_asm_4vx4 +#define DATASIZE 16 +#define VTYPE e64 +#define FLOAD fld +#define FZERO(fr) fcvt.d.w fr, x0 +#define FEQ feq.d +#define VLE th.vlseg2e.v +#define VSE th.vsseg2e.v + +#include "bli_czgemm_rvv_sg2042_asm_4vx4.h" diff --git a/kernels/rvv_sg2042/3/rvv_sg2042_restore_registers.h b/kernels/rvv_sg2042/3/rvv_sg2042_restore_registers.h new file mode 100644 index 000000000..bcf7d17c8 --- /dev/null +++ b/kernels/rvv_sg2042/3/rvv_sg2042_restore_registers.h @@ -0,0 +1,77 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2023, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + + +// 128-bit RISC-V is assumed to support the __riscv_xlen test macro +#if __riscv_xlen == 128 // false if !defined(__riscv_xlen) + + lq s7, 112(sp) + lq s6, 96(sp) + lq s5, 80(sp) + lq s4, 64(sp) + lq s3, 48(sp) + lq s2, 32(sp) + lq s1, 16(sp) + lq s0, 0(sp) + addi sp, sp, 128 + +// 64-bit RISC-V can be indicated by either __riscv_xlen == 64 or +// RISCV_SIZE == 64, to support toolchains which do not currently +// support __riscv_xlen. If a macro is undefined, it is considered 0. +#elif __riscv_xlen == 64 || RISCV_SIZE == 64 + + ld s7, 56(sp) + ld s6, 48(sp) + ld s5, 40(sp) + ld s4, 32(sp) + ld s3, 24(sp) + ld s2, 16(sp) + ld s1, 8(sp) + ld s0, 0(sp) + addi sp, sp, 64 + +#else +// else 32-bit RISC-V is assumed + + lw s7, 28(sp) + lw s6, 24(sp) + lw s5, 20(sp) + lw s4, 16(sp) + lw s3, 12(sp) + lw s2, 8(sp) + lw s1, 4(sp) + lw s0, 0(sp) + addi sp, sp, 32 + +#endif diff --git a/kernels/rvv_sg2042/3/rvv_sg2042_save_registers.h b/kernels/rvv_sg2042/3/rvv_sg2042_save_registers.h new file mode 100644 index 000000000..537c76ca6 --- /dev/null +++ b/kernels/rvv_sg2042/3/rvv_sg2042_save_registers.h @@ -0,0 +1,77 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2023, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +*/ + +// 128-bit RISC-V is assumed to support the __riscv_xlen test macro +#if __riscv_xlen == 128 // false if !defined(__riscv_xlen) + + addi sp, sp, -128 + sq s7, 112(sp) + sq s6, 96(sp) + sq s5, 80(sp) + sq s4, 64(sp) + sq s3, 48(sp) + sq s2, 32(sp) + sq s1, 16(sp) + sq s0, 0(sp) + +// 64-bit RISC-V can be indicated by either __riscv_xlen == 64 or +// RISCV_SIZE == 64, to support toolchains which do not currently +// support __riscv_xlen. If a macro is undefined, it is considered 0. +#elif __riscv_xlen == 64 || RISCV_SIZE == 64 + + addi sp, sp, -64 + sd s7, 56(sp) + sd s6, 48(sp) + sd s5, 40(sp) + sd s4, 32(sp) + sd s3, 24(sp) + sd s2, 16(sp) + sd s1, 8(sp) + sd s0, 0(sp) + +#else +// else 32-bit RISC-V is assumed + + addi sp, sp, -32 + sw s7, 28(sp) + sw s6, 24(sp) + sw s5, 20(sp) + sw s4, 16(sp) + sw s3, 12(sp) + sw s2, 8(sp) + sw s1, 4(sp) + sw s0, 0(sp) + +#endif diff --git a/kernels/rvv_sg2042/bli_kernels_rvv_sg2042.h b/kernels/rvv_sg2042/bli_kernels_rvv_sg2042.h new file mode 100644 index 000000000..3c3335619 --- /dev/null +++ b/kernels/rvv_sg2042/bli_kernels_rvv_sg2042.h @@ -0,0 +1,38 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2023, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +GEMM_UKR_PROT( float, s, gemm_rvv_sg2042_4vx4 ) +GEMM_UKR_PROT( double, d, gemm_rvv_sg2042_4vx4 ) +GEMM_UKR_PROT( scomplex, c, gemm_rvv_sg2042_4vx4 ) +GEMM_UKR_PROT( dcomplex, z, gemm_rvv_sg2042_4vx4 ) diff --git a/share/blis/avx.s b/share/blis/avx.s new file mode 100644 index 000000000..4b6043e87 --- /dev/null +++ b/share/blis/avx.s @@ -0,0 +1,6 @@ +// +// Test for AVX instruction set. +// +vzeroall +vmovapd %ymm0, %ymm1 +vmulpd %ymm0, %ymm0, %ymm1 diff --git a/share/blis/avx512dq.s b/share/blis/avx512dq.s new file mode 100644 index 000000000..7eb1c6dea --- /dev/null +++ b/share/blis/avx512dq.s @@ -0,0 +1,6 @@ +// +// Test for AVX-512dq instruction set. +// +vzeroall +vpmullq %zmm0, %zmm0, %zmm1 +vpmullw %zmm0, %zmm0, %zmm1 diff --git a/share/blis/avx512f.s b/share/blis/avx512f.s new file mode 100644 index 000000000..a07dad0ca --- /dev/null +++ b/share/blis/avx512f.s @@ -0,0 +1,7 @@ +// +// Test for AVX-512f instruction set. +// +vzeroall +vmovapd %zmm0, %zmm1 +vmulpd %zmm0, %zmm0, %zmm1 +vfmadd213pd 0x400(%rax,%rsi,8) {1to8}, %zmm1, %zmm2 diff --git a/share/blis/common.mk b/share/blis/common.mk new file mode 100644 index 000000000..584eb20f4 --- /dev/null +++ b/share/blis/common.mk @@ -0,0 +1,1365 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + +# Only include this block of code once +ifndef COMMON_MK_INCLUDED +COMMON_MK_INCLUDED := yes + + + +# +# --- CFLAGS storage functions ------------------------------------------------- +# + +# Define a function that stores the value of a variable to a different +# variable containing a specified suffix (corresponding to a configuration). +define store-var-for +$(strip $(1)).$(strip $(2)) := $($(strip $(1))) +endef + +# Define a function similar to store-var-for, except that appends instead +# of overwriting. +define append-var-for +$(strip $(1)).$(strip $(2)) += $($(strip $(1))) +endef + +# Define a function that stores the value of all of the variables in a +# make_defs.mk file to other variables with the configuration (the +# argument $(1)) added as a suffix. This function is called once from +# each make_defs.mk. Also, add the configuration to CONFIGS_INCL. +define store-make-defs +$(eval $(call store-var-for,CC, $(1))) +$(eval $(call store-var-for,CC_VENDOR, $(1))) +$(eval $(call store-var-for,CPPROCFLAGS,$(1))) +$(eval $(call store-var-for,CLANGFLAGS, $(1))) +$(eval $(call store-var-for,CMISCFLAGS, $(1))) +$(eval $(call store-var-for,CPICFLAGS, $(1))) +$(eval $(call store-var-for,CWARNFLAGS, $(1))) +$(eval $(call store-var-for,CDBGFLAGS, $(1))) +$(eval $(call store-var-for,COPTFLAGS, $(1))) +$(eval $(call store-var-for,CKOPTFLAGS, $(1))) +$(eval $(call store-var-for,CKVECFLAGS, $(1))) +$(eval $(call store-var-for,CROPTFLAGS, $(1))) +$(eval $(call store-var-for,CRVECFLAGS, $(1))) +CONFIGS_INCL += $(1) +endef + +# Define a function that retreives the value of a variable for a +# given configuration. +define load-var-for +$($(strip $(1)).$(strip $(2))) +endef + + + +# +# --- CFLAGS query functions --------------------------------------------------- +# + +# Define some functions that return the appropriate CFLAGS for a given +# configuration. This assumes that the make_defs.mk files have already been +# included, which results in those values having been stored to +# configuration-qualified variables. + +get-noopt-cflags-for = $(strip $(CFLAGS_PRESET) \ + $(call load-var-for,CDBGFLAGS,$(1)) \ + $(call load-var-for,CWARNFLAGS,$(1)) \ + $(call load-var-for,CPICFLAGS,$(1)) \ + $(call load-var-for,CMISCFLAGS,$(1)) \ + $(call load-var-for,CLANGFLAGS,$(1)) \ + $(call load-var-for,CPPROCFLAGS,$(1)) \ + $(CTHREADFLAGS) \ + $(CINCFLAGS) \ + ) + +get-noopt-cxxflags-for = $(strip $(CXXFLAGS_PRESET) \ + $(call load-var-for,CDBGFLAGS,$(1)) \ + $(call load-var-for,CWARNFLAGS,$(1)) \ + $(call load-var-for,CPICFLAGS,$(1)) \ + $(call load-var-for,CMISCFLAGS,$(1)) \ + $(call load-var-for,CPPROCFLAGS,$(1)) \ + $(CXXLANGFLAGS) \ + $(CTHREADFLAGS) \ + $(CXXTHREADFLAGS) \ + $(CINCFLAGS) \ + ) + +get-refinit-cflags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ + $(call get-noopt-cflags-for,$(1)) \ + -DBLIS_CNAME=$(1) \ + -DBLIS_CNAME_UPPER=$(shell echo $(1) | tr a-z A-Z) \ + $(BUILD_ASANFLAGS) \ + $(BUILD_CPPFLAGS) \ + $(BUILD_SYMFLAGS) \ + -DBLIS_IN_REF_KERNEL=1 \ + -include $(CONFIG_PATH)/$(1)/bli_kernel_defs_$(1).h \ + ) + +get-refinit-cxxflags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ + $(call get-noopt-cxxflags-for,$(1)) \ + -DBLIS_CNAME=$(1) \ + -DBLIS_CNAME_UPPER=$(shell echo $(1) | tr a-z A-Z) \ + $(BUILD_ASANFLAGS) \ + $(BUILD_CPPFLAGS) \ + $(BUILD_SYMFLAGS) \ + -DBLIS_IN_REF_KERNEL=1 \ + -include $(CONFIG_PATH)/$(1)/bli_kernel_defs_$(1).h \ + ) + +get-refkern-cflags-for = $(strip $(call load-var-for,CROPTFLAGS,$(1)) \ + $(call load-var-for,CRVECFLAGS,$(1)) \ + $(call get-noopt-cflags-for,$(1)) \ + $(COMPSIMDFLAGS) \ + -DBLIS_CNAME=$(1) \ + -DBLIS_CNAME_UPPER=$(shell echo $(1) | tr a-z A-Z) \ + $(BUILD_ASANFLAGS) \ + $(BUILD_CPPFLAGS) \ + $(BUILD_SYMFLAGS) \ + -DBLIS_IN_REF_KERNEL=1 \ + -include $(CONFIG_PATH)/$(1)/bli_kernel_defs_$(1).h \ + ) + +get-refkern-cxxflags-for = $(strip $(call load-var-for,CROPTFLAGS,$(1)) \ + $(call load-var-for,CRVECFLAGS,$(1)) \ + $(call get-noopt-cxxflags-for,$(1)) \ + $(COMPSIMDFLAGS) \ + -DBLIS_CNAME=$(1) \ + -DBLIS_CNAME_UPPER=$(shell echo $(1) | tr a-z A-Z) \ + $(BUILD_ASANFLAGS) \ + $(BUILD_CPPFLAGS) \ + $(BUILD_SYMFLAGS) \ + -DBLIS_IN_REF_KERNEL=1 \ + -include $(CONFIG_PATH)/$(1)/bli_kernel_defs_$(1).h \ + ) + +get-config-cflags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ + $(call get-noopt-cflags-for,$(1)) \ + -DBLIS_CNAME=$(1) \ + -DBLIS_CNAME_UPPER=$(shell echo $(1) | tr a-z A-Z) \ + $(BUILD_ASANFLAGS) \ + $(BUILD_CPPFLAGS) \ + $(BUILD_SYMFLAGS) \ + ) + +get-config-cxxflags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ + $(call get-noopt-cxxflags-for,$(1)) \ + -DBLIS_CNAME=$(1) \ + -DBLIS_CNAME_UPPER=$(shell echo $(1) | tr a-z A-Z) \ + $(BUILD_ASANFLAGS) \ + $(BUILD_CPPFLAGS) \ + $(BUILD_SYMFLAGS) \ + ) + +get-frame-cflags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ + $(call get-noopt-cflags-for,$(1)) \ + $(BUILD_ASANFLAGS) \ + $(BUILD_CPPFLAGS) \ + $(BUILD_SYMFLAGS) \ + ) + +get-frame-cxxflags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ + $(call get-noopt-cxxflags-for,$(1)) \ + $(BUILD_ASANFLAGS) \ + $(BUILD_CPPFLAGS) \ + $(BUILD_SYMFLAGS) \ + ) + +get-kernel-cflags-for = $(strip $(call load-var-for,CKOPTFLAGS,$(1)) \ + $(call load-var-for,CKVECFLAGS,$(1)) \ + $(call get-noopt-cflags-for,$(1)) \ + -DBLIS_CNAME=$(1) \ + -DBLIS_CNAME_UPPER=$(shell echo $(1) | tr a-z A-Z) \ + $(BUILD_CPPFLAGS) \ + $(BUILD_SYMFLAGS) \ + ) + +get-kernel-cxxflags-for = $(strip $(call load-var-for,CKOPTFLAGS,$(1)) \ + $(call load-var-for,CKVECFLAGS,$(1)) \ + $(call get-noopt-cxxflags-for,$(1)) \ + -DBLIS_CNAME=$(1) \ + -DBLIS_CNAME_UPPER=$(shell echo $(1) | tr a-z A-Z) \ + $(BUILD_CPPFLAGS) \ + $(BUILD_SYMFLAGS) \ + ) + +# When compiling addons, we use flags similar to those of general framework +# source. This ensures that the same code can be linked and run across various +# sub-configurations. +get-addon-c99flags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ + $(call get-noopt-cflags-for,$(1)) \ + $(CADDONINCFLAGS) \ + $(BUILD_CPPFLAGS) \ + $(BUILD_SYMFLAGS) \ + ) +get-addon-cxxflags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ + $(call get-noopt-cxxflags-for,$(1)) \ + $(CADDONINCFLAGS) \ + $(BUILD_CPPFLAGS) \ + $(BUILD_SYMFLAGS) \ + ) +# When compiling addon kernels, we use flags similar to those of kernels +# flags, except we also include the addon header paths. +get-addon-kernel-c99flags-for = $(strip $(call load-var-for,CKOPTFLAGS,$(1)) \ + $(call load-var-for,CKVECFLAGS,$(1)) \ + $(call get-noopt-cflags-for,$(1)) \ + $(CADDONINCFLAGS) \ + $(BUILD_CPPFLAGS) \ + $(BUILD_SYMFLAGS) \ + ) + +# When compiling sandboxes, we use flags similar to those of general framework +# source. This ensures that the same code can be linked and run across various +# sub-configurations. (NOTE: If we ever switch to using refkernel or kernel +# flags, we should prevent enabling sandboxes for umbrella families by verifying +# that config_list == config_name if --enable-sandbox is given. THIS ALSO +# APPLIES TO ADDONS ABOVE.) +get-sandbox-c99flags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ + $(call get-noopt-cflags-for,$(1)) \ + $(CSANDINCFLAGS) \ + $(BUILD_CPPFLAGS) \ + $(BUILD_SYMFLAGS) \ + ) +get-sandbox-cxxflags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ + $(call get-noopt-cxxflags-for,$(1)) \ + $(CSANDINCFLAGS) \ + $(BUILD_CPPFLAGS) \ + $(BUILD_SYMFLAGS) \ + ) + +# Define a separate function that will return appropriate flags for use by +# applications that want to use the same basic flags as those used when BLIS +# was compiled. (NOTE: This is the same as the $(get-frame-cflags-for ...) +# function, except that it omits a few variables that contain flags exclusively +# for use when BLIS is being compiled/built: +# - BUILD_CPPFLAGS, which contains a cpp macro that confirms that BLIS +# is being built; +# - BUILD_SYMFLAGS, which contains symbol export flags that are only +# needed when a shared library is being compiled/linked; and +# - BUILD_ASANFLAGS, which contains a flag that causes the compiler to +# insert instrumentation for memory error detection. +get-user-cflags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ + $(call get-noopt-cflags-for,$(1)) \ + ) + +# Define functions that return messages appropriate for each non-verbose line +# of compilation output. +get-noopt-text = "(CFLAGS for no optimization)" +get-refinit-text-for = "('$(1)' CFLAGS for ref. kernel init)" +get-refinit-cxxtext-for = "('$(1)' CXXFLAGS for ref. kernel init)" +get-refkern-text-for = "('$(1)' CFLAGS for ref. kernels)" +get-refkern-cxxtext-for = "('$(1)' CXXFLAGS for ref. kernels)" +get-config-text-for = "('$(1)' CFLAGS for config code)" +get-config-cxxtext-for = "('$(1)' CXXFLAGS for config code)" +get-frame-text-for = "('$(1)' CFLAGS for framework code)" +get-frame-cxxtext-for = "('$(1)' CXXFLAGS for framework code)" +get-kernel-text-for = "('$(1)' CFLAGS for kernels)" +get-kernel-cxxtext-for = "('$(1)' CXXFLAGS for kernels)" +get-addon-c99text-for = "('$(1)' CFLAGS for addons)" +get-addon-cxxtext-for = "('$(1)' CXXFLAGS for addons)" +get-addon-kernel-text-for = "('$(1)' CFLAGS for addon kernels)" +get-sandbox-c99text-for = "('$(1)' CFLAGS for sandboxes)" +get-sandbox-cxxtext-for = "('$(1)' CXXFLAGS for sandboxes)" + + + +# +# --- Miscellaneous helper functions ------------------------------------------- +# + +# Define functions that filters a list of filepaths $(1) that contain (or +# omit) an arbitrary substring $(2). +files-that-contain = $(strip $(foreach f, $(1), $(if $(findstring $(2),$(f)),$(f),))) +files-that-dont-contain = $(strip $(foreach f, $(1), $(if $(findstring $(2),$(f)),,$(f)))) + +# Define a function that removes duplicate strings *without* using the sort +# function. +rm-dups = $(if $1,$(firstword $1) $(call rm-dups,$(filter-out $(firstword $1),$1))) + + + +# +# --- Include makefile configuration file -------------------------------------- +# + +# Use the current directory as the default path to the root directory for +# makefile fragments (and the configuration family's make_defs.mk), but +# allow the includer to override this value if it needs to point to an +# installation directory. +ifeq ($(strip $(SHARE_PATH)),) +SHARE_PATH := . +endif + +# Define the name of the configuration file. +CONFIG_MK_FILE := config.mk + +# Identify the base path for the root directory for makefile fragments (and +# the configuration family's make_defs.mk). We define this path in terms of +# SHARE_PATH, which gets a default value above (which is what happens for the +# top-level Makefile). If SHARE_PATH is specified by the Makefile prior to +# including common.mk, that path is used instead. This allows Makefiles for +# example code and test drivers to reference an installed prefix directory +# for situations when the build directory no longer exists. +BASE_SHARE_PATH := $(SHARE_PATH) + +# Include the configuration file. +-include $(BASE_SHARE_PATH)/$(CONFIG_MK_FILE) + + + +# +# --- Handle 'make clean' and friends without config.mk ------------------------ +# + +# Detect whether we actually got the configuration file. If we didn't, then +# it is likely that the user has not yet generated it (via configure). +ifeq ($(strip $(CONFIG_MK_INCLUDED)),yes) +CONFIG_MK_PRESENT := yes +IS_CONFIGURED := yes +else +CONFIG_MK_PRESENT := no +IS_CONFIGURED := no +endif + +# If we didn't get config.mk, then we need to set some basic variables so +# that make will function without error for things like 'make clean'. +ifeq ($(IS_CONFIGURED),no) + +# If this makefile fragment is being run and there is no config.mk present, +# then it's probably safe to assume that the user is currently located in the +# source distribution. +DIST_PATH := . + +# Even though they won't be used explicitly, it appears that setting these +# INSTALL_* variables to something sane (that is, not allowing them default +# to the empty string) is necessary to prevent make from hanging, likely +# because the statements that define UNINSTALL_LIBS and UNINSTALL_HEADERS, +# when evaluated, result in running 'find' on the root directory--definitely +# something we would like to avoid. +INSTALL_LIBDIR := $(HOME)/blis/lib +INSTALL_INCDIR := $(HOME)/blis/include +INSTALL_SHAREDIR := $(HOME)/blis/share + +endif + + + +# +# --- Primary makefile variable definitions ------------------------------------ +# + +# Construct the architecture-version string, which will be used to name the +# library upon installation. +VERS_CONF := $(VERSION)-$(CONFIG_NAME) + +# All makefile fragments in the tree will have this name. +FRAGMENT_MK := .fragment.mk + +# Locations of important files. +BUILD_DIR := build +CONFIG_DIR := config +ifeq ($(FRAME_DIR),) +FRAME_DIR := frame +endif +REFKERN_DIR := ref_kernels +KERNELS_DIR := kernels +ADDON_DIR := addon +SANDBOX_DIR := sandbox +OBJ_DIR := obj +LIB_DIR := lib +INCLUDE_DIR := include +BLASTEST_DIR := blastest +TESTSUITE_DIR := testsuite + +VEND_DIR := vendor +VEND_CPP_DIR := $(VEND_DIR)/cpp +VEND_TESTCPP_DIR := $(VEND_DIR)/testcpp + +# The filename suffix for reference kernels. +REFNM := ref + +# Source suffixes. +CONFIG_SRC_SUFS := c +KERNELS_SRC_SUFS := c s S +ifneq ($(findstring hpx,$(THREADING_MODEL)),) +FRAME_SRC_SUFS := c cpp +else +FRAME_SRC_SUFS := c +endif + +ADDON_C99_SUFS := c +ADDON_CXX_SUFS := cc cpp cxx +ADDON_SRC_SUFS := $(ADDON_C99_SUFS) $(ADDON_CXX_SUFS) + +SANDBOX_C99_SUFS := c +SANDBOX_CXX_SUFS := cc cpp cxx +SANDBOX_SRC_SUFS := $(SANDBOX_C99_SUFS) $(SANDBOX_CXX_SUFS) + +# Header suffixes. +FRAME_H99_SUFS := h +FRAME_HDR_SUFS := $(FRAME_H99_SUFS) + +ADDON_H99_SUFS := h +ADDON_HXX_SUFS := hh hpp hxx +ADDON_HDR_SUFS := $(ADDON_H99_SUFS) $(ADDON_HXX_SUFS) + +SANDBOX_H99_SUFS := h +SANDBOX_HXX_SUFS := hh hpp hxx +SANDBOX_HDR_SUFS := $(SANDBOX_H99_SUFS) $(SANDBOX_HXX_SUFS) + +# Combine all header suffixes and remove duplicates via sort(). +ALL_HDR_SUFS := $(sort $(FRAME_HDR_SUFS) \ + $(ADDON_HDR_SUFS) \ + $(SANDBOX_HDR_SUFS) ) + +ALL_H99_SUFS := $(sort $(FRAME_H99_SUFS) \ + $(ADDON_H99_SUFS) \ + $(SANDBOX_H99_SUFS) ) + +# The names of scripts that check output from the BLAS test drivers and +# BLIS test suite. +BLASTEST_CHECK := check-blastest.sh +TESTSUITE_CHECK := check-blistest.sh + +# The names of the testsuite input/configuration files. +TESTSUITE_CONF_GEN := input.general +TESTSUITE_CONF_OPS := input.operations +TESTSUITE_FAST_GEN := input.general.fast +TESTSUITE_FAST_OPS := input.operations.fast +TESTSUITE_MIXD_GEN := input.general.mixed +TESTSUITE_MIXD_OPS := input.operations.mixed +TESTSUITE_SALT_GEN := input.general.salt +TESTSUITE_SALT_OPS := input.operations.salt +TESTSUITE_OUT_FILE := output.testsuite + +# CHANGELOG file. +CHANGELOG := CHANGELOG + +# Something for OS X so that echo -n works as expected. +SHELL := bash + +# Construct paths to the four primary directories of source code: +# the config directory, general framework code, reference kernel code, +# and optimized kernel code. Also process paths for addon and sandbox +# directories. +CONFIG_PATH := $(DIST_PATH)/$(CONFIG_DIR) +FRAME_PATH := $(DIST_PATH)/$(FRAME_DIR) +REFKERN_PATH := $(DIST_PATH)/$(REFKERN_DIR) +KERNELS_PATH := $(DIST_PATH)/$(KERNELS_DIR) +ADDON_PATH := $(DIST_PATH)/$(ADDON_DIR) +SANDBOX_PATH := $(DIST_PATH)/$(SANDBOX_DIR) +BUILD_PATH := $(DIST_PATH)/$(BUILD_DIR) + +# Construct paths to some optional C++ template headers contributed by AMD. +VEND_CPP_PATH := $(DIST_PATH)/$(VEND_CPP_DIR) +VEND_TESTCPP_PATH := $(DIST_PATH)/$(VEND_TESTCPP_DIR) + +# Construct paths to the makefile fragments for the four primary directories +# of source code: the config directory, general framework code, reference +# kernel code, and optimized kernel code. +CONFIG_FRAG_PATH := ./obj/$(CONFIG_NAME)/$(CONFIG_DIR) +FRAME_FRAG_PATH := ./obj/$(CONFIG_NAME)/$(FRAME_DIR) +REFKERN_FRAG_PATH := ./obj/$(CONFIG_NAME)/$(REFKERN_DIR) +KERNELS_FRAG_PATH := ./obj/$(CONFIG_NAME)/$(KERNELS_DIR) +ADDON_FRAG_PATH := ./obj/$(CONFIG_NAME)/$(ADDON_DIR) +SANDBOX_FRAG_PATH := ./obj/$(CONFIG_NAME)/$(SANDBOX_DIR) + + + +# +# --- Library name and local paths --------------------------------------------- +# + +# Use lib/CONFIG_NAME as the default path to the local header files, but +# allow the includer to override this value if it needs to point to an +# installation directory. +ifeq ($(strip $(LIB_PATH)),) +LIB_PATH := $(LIB_DIR)/$(CONFIG_NAME) +endif + +# Identify the base path for the intermediate library directory. We define +# this path in terms of LIB_PATH, which gets a default value above (which is +# what happens for the top-level Makefile). If LIB_PATH is specified by the +# Makefile prior to including common.mk, that path is used instead. This +# allows Makefiles for example code and test drivers to reference an installed +# prefix directory for situations when the build directory no longer exists. +BASE_LIB_PATH := $(LIB_PATH) + +# The base name of the BLIS library that we will build. +LIBBLIS := libblis + +# The shared (dynamic) library file suffix is different for Linux and OS X. +ifeq ($(OS_NAME),Darwin) +SHLIB_EXT := dylib +else ifeq ($(IS_WIN),yes) +ifeq ($(IS_MSVC),no) +SHLIB_EXT := dll.a +else +SHLIB_EXT := lib +endif +else +SHLIB_EXT := so +endif + +# Note: These names will be modified later to include the configuration and +# version strings. +LIBBLIS_A := $(LIBBLIS).a +LIBBLIS_SO := $(LIBBLIS).$(SHLIB_EXT) + +# Append the base library path to the library names. +LIBBLIS_A_PATH := $(BASE_LIB_PATH)/$(LIBBLIS_A) +LIBBLIS_SO_PATH := $(BASE_LIB_PATH)/$(LIBBLIS_SO) + +# Create a filepath to a local symlink to the soname--that is, the same as +# LIBBLIS_SO_PATH except with the .so major version number. Since the shared +# library lists its soname as 'libblis.so.n', where n is the .so major version +# number, a symlink in BASE_LIB_PATH is needed so that ld can find the local +# shared library when the testsuite is run via 'make test' or 'make check'. + +ifeq ($(OS_NAME),Darwin) +# OS X shared library extensions. +LIBBLIS_SO_MAJ_EXT := $(SO_MAJOR).$(SHLIB_EXT) +LIBBLIS_SO_MMB_EXT := $(SO_MMB).$(SHLIB_EXT) +else ifeq ($(IS_WIN),yes) +# Windows shared library extension. +LIBBLIS_SO_MAJ_EXT := $(SO_MAJOR).dll +LIBBLIS_SO_MMB_EXT := +else +# Linux shared library extensions. +LIBBLIS_SO_MAJ_EXT := $(SHLIB_EXT).$(SO_MAJOR) +LIBBLIS_SO_MMB_EXT := $(SHLIB_EXT).$(SO_MMB) +endif +LIBBLIS_SONAME := $(LIBBLIS).$(LIBBLIS_SO_MAJ_EXT) +LIBBLIS_SO_MAJ_PATH := $(BASE_LIB_PATH)/$(LIBBLIS_SONAME) + +# Construct the output path when building a shared library. +# NOTE: This code and the code immediately above is a little curious and +# perhaps could be refactored (carefully). +ifeq ($(IS_WIN),yes) +LIBBLIS_SO_OUTPUT_NAME := $(LIBBLIS_SO_MAJ_PATH) +else +LIBBLIS_SO_OUTPUT_NAME := $(LIBBLIS_SO_PATH) +endif + + + +# +# --- Utility program definitions ---------------------------------------------- +# + +SH := /bin/sh +MV := mv +MKDIR := mkdir -p +RM_F := rm -f +RM_RF := rm -rf +SYMLINK := ln -sf +FIND := find +GREP := grep +EGREP := grep -E +XARGS := xargs +INSTALL := install -c +DEVNULL := /dev/null + +# Script for creating a monolithic header file. +#FLATTEN_H := $(DIST_PATH)/build/flatten-headers.sh +FLATTEN_H := $(PYTHON) $(DIST_PATH)/build/flatten-headers.py + +# Default archiver flags. +ARFLAGS := cr + +# Used to refresh CHANGELOG. +GIT := git +GIT_LOG := $(GIT) log --decorate + +# Define the locations of a script to generate a list of shared library symbols +# within BLIS as well as the symbol file itself. +GEN_SYMS := $(BUILD_PATH)/gen-libblis-symbols.sh +SYM_FILE := $(BUILD_PATH)/libblis-symbols.def + + + +# +# --- Default linker definitions ----------------------------------------------- +# + +# NOTE: This section needs to reside before the inclusion of make_defs.mk +# files (just below), as most configurations' make_defs.mk don't tinker +# with things like LDFLAGS, but some do (or may), in which case they can +# manually override whatever they need. + +# Define the external libraries we may potentially need at link-time. +ifeq ($(IS_MSVC),yes) +LIBM := +else +LIBM := -lm +endif +LIBMEMKIND := -lmemkind + +# Default linker flags. +# NOTE: -lpthread is needed unconditionally because BLIS uses pthread_once() +# to initialize itself in a thread-safe manner. The one exception to this +# rule: if --disable-system is given at configure-time, LIBPTHREAD is empty. +LDFLAGS := $(LDFLAGS_PRESET) $(LIBM) $(LIBPTHREAD) + +# Add libmemkind to the link-time flags, if it was enabled at configure-time. +ifeq ($(MK_ENABLE_MEMKIND),yes) +LDFLAGS += $(LIBMEMKIND) +endif + +# Never use libm with Intel compilers. +ifeq ($(CC_VENDOR),icc) +LDFLAGS := $(filter-out $(LIBM),$(LDFLAGS)) +endif + +# Never use libmemkind with Intel SDE. +ifeq ($(DEBUG_TYPE),sde) +LDFLAGS := $(filter-out $(LIBMEMKIND),$(LDFLAGS)) +endif + +# If AddressSanitizer is enabled, add the compiler flag to LDFLAGS. +ifeq ($(MK_ENABLE_ASAN),yes) +LDFLAGS += -fsanitize=address +endif + +# Specify the shared library's 'soname' field. +# NOTE: The flag for creating shared objects is different for Linux and OS X. +ifeq ($(OS_NAME),Darwin) +# OS X shared library link flags. +SOFLAGS := -dynamiclib +ifeq ($(MK_ENABLE_RPATH),yes) +SOFLAGS += -Wl,-install_name,@rpath/$(LIBBLIS_SONAME) +else +SOFLAGS += -Wl,-install_name,$(libdir)/$(LIBBLIS_SONAME) +endif +else +SOFLAGS := -shared +ifeq ($(IS_WIN),yes) +# Windows shared library link flags. +ifeq ($(IS_MSVC),yes) +SOFLAGS += -Wl,-implib:$(BASE_LIB_PATH)/$(LIBBLIS).lib +else +SOFLAGS += -Wl,--out-implib,$(BASE_LIB_PATH)/$(LIBBLIS).dll.a +endif +else +# Linux shared library link flags. +SOFLAGS += -Wl,-soname,$(LIBBLIS_SONAME) +endif +endif + +# Decide which library to link to for things like the testsuite and BLIS test +# drivers. We default to the static library, unless only the shared library was +# enabled, in which case we use the shared library. +LIBBLIS_L := $(LIBBLIS_A) +LIBBLIS_LINK := $(LIBBLIS_A_PATH) +ifeq ($(MK_ENABLE_SHARED),yes) +ifeq ($(MK_ENABLE_STATIC),no) +LIBBLIS_L := $(LIBBLIS_SO) +LIBBLIS_LINK := $(LIBBLIS_SO_PATH) +ifeq ($(IS_WIN),no) +# For Linux and OS X: set rpath property of shared object. +ifeq ($(OS_NAME),Darwin) +# rpath for test_libblis.x +LDFLAGS += -Wl,-rpath,@executable_path/$(BASE_LIB_PATH) +# rpath for BLAS tests +LDFLAGS += -Wl,-rpath,@executable_path/../../../$(BASE_LIB_PATH) +else +# rpath for test_libblis.x +LDFLAGS += -Wl,-rpath,'$$ORIGIN/$(BASE_LIB_PATH)' +# rpath for BLAS tests +LDFLAGS += -Wl,-rpath,'$$ORIGIN/../../../$(BASE_LIB_PATH)' +endif +endif +endif +# On windows, use the shared library even if static is created. +ifeq ($(IS_WIN),yes) +LIBBLIS_L := $(LIBBLIS_SO) +LIBBLIS_LINK := $(LIBBLIS_SO_PATH) +endif +endif + + + +# +# --- Include makefile definitions file ---------------------------------------- +# + +# Define the name of the file containing build and architecture-specific +# makefile definitions. +MAKE_DEFS_FILE := make_defs.mk + +# Assemble a list of all configuration family members, including the +# configuration family name itself. Note that sort() will remove duplicates +# for situations where CONFIG_NAME is present in CONFIG_LIST, such as would +# be the case for singleton families. +CONFIG_LIST_FAM := $(sort $(strip $(CONFIG_LIST) $(CONFIG_NAME))) + +# Construct the paths to the makefile definitions files, each of which +# resides in a separate configuration sub-directory. We use CONFIG_LIST_FAM +# since we might need the makefile definitions associated with the +# configuration family (if it is an umbrella family). +# NOTE: We use the prefix $(BASE_SHARE_PATH)/$(CONFIG_DIR)/ instead of +# $(CONFIG_PATH) so that make_defs.mk can be found when it is installed, +# provided the caller defined SHARE_PATH to that install directory. +CONFIG_PATHS := $(addprefix $(BASE_SHARE_PATH)/$(CONFIG_DIR)/, \ + $(CONFIG_LIST_FAM)) +MAKE_DEFS_MK_PATHS := $(addsuffix /$(MAKE_DEFS_FILE), $(CONFIG_PATHS)) + +# Initialize the list of included (found) configurations to empty. +CONFIGS_INCL := + +# Include the makefile definitions files implied by the list of configurations. +-include $(MAKE_DEFS_MK_PATHS) + +# Detect whether we actually got all of the make definitions files. If +# we didn't, then maybe a configuration is mislabeled or missing. The +# check-env-make-defs target checks ALL_MAKE_DEFS_MK_PRESENT and outputs +# an error message if it is set to 'no'. +# NOTE: We use CONFIG_LIST_FAM as the expected list of configurations. +# This combines CONFIG_NAME with CONFIG_LIST. The inclusion of CONFIG_NAME +# is needed for situations where the configuration family is an umbrella +# family (e.g. 'intel64'), since families have separate make_def.mk files. +CONFIGS_EXPECTED := $(CONFIG_LIST_FAM) +ifeq ($(sort $(strip $(CONFIGS_INCL))), \ + $(sort $(strip $(CONFIGS_EXPECTED)))) +ALL_MAKE_DEFS_MK_PRESENT := yes +else +ALL_MAKE_DEFS_MK_PRESENT := no +endif + + + +# +# --- Configuration-agnostic flags --------------------------------------------- +# + +# --- Linker program --- + +# Use whatever compiler was chosen. A C++ compiler must be used if HPX is enabled. +ifneq ($(findstring hpx,$(THREADING_MODEL)),) +LINKER := $(CXX) +else +LINKER := $(CC) +endif + +# --- Warning flags --- + +CWARNFLAGS := + +# Do not allow functions with implicit definitions to be called +ifneq ($(CC_VENDOR),ibm) +CWARNFLAGS += -Werror=implicit-function-declaration +endif + +# Disable unused function warnings and stop compiling on first error for +# all compilers that accept such options: gcc, clang, and icc. +ifneq ($(CC_VENDOR),ibm) +ifneq ($(CC_VENDOR),nvc) +CWARNFLAGS += -Wall -Wno-unused-function -Wfatal-errors +else +CWARNFLAGS += -Wall -Wno-unused-function +endif +endif + +# Disable tautological comparision warnings in clang. +ifeq ($(CC_VENDOR),clang) +CWARNFLAGS += -Wno-tautological-compare -Wno-pass-failed +endif + +# Disable other annoying warnings. +ifeq ($(CC_VENDOR),clang) +CWARNFLAGS += +else +ifeq ($(CC_VENDOR),gcc) +# The '-Wno-maybe-uninitialized' option makes me nervous. Let's temporarily +# disable for now. -FGVZ +#CWARNFLAGS += -Wno-maybe-uninitialized -Wno-comment +CWARNFLAGS += -Wno-comment +endif +endif + +$(foreach c, $(CONFIG_LIST_FAM), $(eval $(call append-var-for,CWARNFLAGS,$(c)))) + +# --- Position-independent code flags (shared libraries only) --- + +# Note: Avoid -fPIC flags for Windows builds since all code is position- +# independent. +ifeq ($(IS_MSVC),yes) +CPICFLAGS := +endif +$(foreach c, $(CONFIG_LIST_FAM), $(eval $(call store-var-for,CPICFLAGS,$(c)))) + +# --- Symbol exporting flags (shared libraries only) --- + +ifeq ($(MK_ENABLE_SHARED),yes) + +# NOTE: These flags are only applied when building BLIS and not used by +# applications that import BLIS compilation flags via the +# $(get-user-cflags-for ...) function. + +# Determine default export behavior / visibility of symbols for gcc. +ifeq ($(CC_VENDOR),gcc) +ifeq ($(IS_WIN),yes) +ifeq ($(EXPORT_SHARED),all) +BUILD_SYMFLAGS := -Wl,--export-all-symbols, -Wl,--enable-auto-import +else # ifeq ($(EXPORT_SHARED),public) +BUILD_SYMFLAGS := -Wl,--exclude-all-symbols +endif +else # ifeq ($(IS_WIN),no) +ifeq ($(EXPORT_SHARED),all) +# Export all symbols by default. +BUILD_SYMFLAGS := -fvisibility=default +else # ifeq ($(EXPORT_SHARED),public) +# Hide all symbols by default and export only those that have been annotated +# as needing to be exported. +BUILD_SYMFLAGS := -fvisibility=hidden +endif +endif +endif + +# Determine default export behavior / visibility of symbols for icc. +# NOTE: The Windows branches have been omitted since we currently make no +# effort to support Windows builds via icc (only gcc/clang via AppVeyor). +ifeq ($(CC_VENDOR),icc) +ifeq ($(EXPORT_SHARED),all) +# Export all symbols by default. +BUILD_SYMFLAGS := -fvisibility=default +else # ifeq ($(EXPORT_SHARED),public) +# Hide all symbols by default and export only those that have been annotated +# as needing to be exported. +BUILD_SYMFLAGS := -fvisibility=hidden +endif +endif + +# Determine default export behavior / visibility of symbols for clang. +ifeq ($(CC_VENDOR),clang) +ifeq ($(IS_WIN),yes) +ifeq ($(IS_MSVC),no) +# This is a clang build targetting MinGW-w64 env +ifeq ($(EXPORT_SHARED),all) +BUILD_SYMFLAGS := -Wl,--export-all-symbols, -Wl,--enable-auto-import +else # ifeq ($(EXPORT_SHARED),all) +BUILD_SYMFLAGS := -Wl,--exclude-all-symbols +endif +endif # ifeq ($(IS_MSVC),no) +ifeq ($(EXPORT_SHARED),all) +# NOTE: clang on Windows does not appear to support exporting all symbols +# by default, and therefore we ignore the value of EXPORT_SHARED. +BUILD_SYMFLAGS := +else # ifeq ($(EXPORT_SHARED),public) +# NOTE: The default behavior of clang on Windows is to hide all symbols +# and only export functions and other declarations that have beenannotated +# as needing to be exported. +BUILD_SYMFLAGS := +endif +else # ifeq ($(IS_WIN),no) +ifeq ($(EXPORT_SHARED),all) +# Export all symbols by default. +BUILD_SYMFLAGS := -fvisibility=default +else # ifeq ($(EXPORT_SHARED),public) +# Hide all symbols by default and export only those that have been annotated +# as needing to be exported. +BUILD_SYMFLAGS := -fvisibility=hidden +endif +endif +endif + +else #ifeq ($(MK_ENABLE_SHARED),no) + +# Don't modify CPICFLAGS for the various configuration family members. +# Don't use any special symbol export flags. +BUILD_SYMFLAGS := + +endif + +# --- Language flags --- + +# Enable C99. +CLANGFLAGS := -std=c99 +$(foreach c, $(CONFIG_LIST_FAM), $(eval $(call append-var-for,CLANGFLAGS,$(c)))) + +# Enable C++11, or C++17 if HPX threading is enabled. +# If building a plugin, do not set any default C++ standard. +ifeq ($(PLUGIN_NAME),) +ifneq ($(findstring hpx,$(THREADING_MODEL)),) +CXXLANGFLAGS := -std=c++17 +else +CXXLANGFLAGS := -std=c++11 +endif +else +CXXLANGFLAGS := +endif + +# --- C Preprocessor flags --- + +# Enable clock_gettime() in time.h. +CPPROCFLAGS := -D_POSIX_C_SOURCE=200112L +# Enable ip_mreq on macOS which is needed for ASIO which is needed for HPX. +ifeq ($(OS_NAME),Darwin) +CPPROCFLAGS += -D_DARWIN_C_SOURCE +endif +$(foreach c, $(CONFIG_LIST_FAM), $(eval $(call append-var-for,CPPROCFLAGS,$(c)))) + +# --- AddressSanitizer flags --- + +ifeq ($(MK_ENABLE_ASAN),yes) +BUILD_ASANFLAGS := -fsanitize=address +else +BUILD_ASANFLAGS := +endif + +# --- Threading flags --- + +# NOTE: We don't have to explicitly omit -pthread when --disable-system is given +# since that option forces --enable-threading=single, and thus -pthread never +# gets added to begin with. + +CTHREADFLAGS := +CXXTHREADFLAGS := + +ifeq ($(CC_VENDOR),gcc) +#ifneq ($(findstring auto,$(THREADING_MODEL)),) +#THREADING_MODEL := openmp +#endif +ifneq ($(findstring openmp,$(THREADING_MODEL)),) +CTHREADFLAGS += -fopenmp +LDFLAGS += -fopenmp +endif +ifneq ($(findstring pthreads,$(THREADING_MODEL)),) +CTHREADFLAGS += -pthread +LDFLAGS += $(LIBPTHREAD) +endif +endif + +ifeq ($(CC_VENDOR),icc) +#ifneq ($(findstring auto,$(THREADING_MODEL)),) +#THREADING_MODEL := openmp +#endif +ifneq ($(findstring openmp,$(THREADING_MODEL)),) +CTHREADFLAGS += -fopenmp +LDFLAGS += -fopenmp +endif +ifneq ($(findstring pthreads,$(THREADING_MODEL)),) +CTHREADFLAGS += -pthread +LDFLAGS += $(LIBPTHREAD) +endif +endif + +ifeq ($(CC_VENDOR),clang) +#ifneq ($(findstring auto,$(THREADING_MODEL)),) +#THREADING_MODEL := pthreads +#endif +ifneq ($(findstring openmp,$(THREADING_MODEL)),) +CTHREADFLAGS += -fopenmp +LDFLAGS += -fopenmp +endif +ifneq ($(findstring pthreads,$(THREADING_MODEL)),) +CTHREADFLAGS += -pthread +LDFLAGS += $(LIBPTHREAD) +endif +endif + +# Threading flags for HPX. +ifneq ($(findstring hpx,$(THREADING_MODEL)),) +HPX_CXXFLAGS := $(shell pkg-config --cflags hpx_component) +HPX_LDFLAGS := $(filter-out -shared,$(shell pkg-config --libs hpx_component)) +CTHREADFLAGS += $(filter-out -std=%,$(HPX_CXXFLAGS)) +LDFLAGS += $(HPX_LDFLAGS) +ifeq ($(OS_NAME),Darwin) +RPATH_PREFIX := -Wl,-rpath, +LDFLAGS += $(patsubst -L%,$(RPATH_PREFIX)%,$(filter -L%,$(HPX_LDFLAGS))) +endif +endif + +# --- #pragma omp simd flags (used for reference kernels only) --- + +ifeq ($(PRAGMA_OMP_SIMD),yes) +ifeq ($(CC_VENDOR),gcc) +COMPSIMDFLAGS := -fopenmp-simd +else +ifeq ($(CC_VENDOR),clang) +COMPSIMDFLAGS := -fopenmp-simd +else +ifeq ($(CC_VENDOR),icc) +COMPSIMDFLAGS := -qopenmp-simd +endif +endif +endif +else # ifeq ($(PRAGMA_OMP_SIMD),no) +COMPSIMDFLAGS := +endif + + + +# +# --- Adjust verbosity level manually using make V=[0,1] ----------------------- +# + +ifeq ($(V),1) +ENABLE_VERBOSE := yes +BLIS_ENABLE_TEST_OUTPUT := yes +endif + +ifeq ($(V),0) +ENABLE_VERBOSE := no +BLIS_ENABLE_TEST_OUTPUT := no +endif + + + +# +# --- Append OS-specific libraries to LDFLAGS ---------------------------------- +# + +ifeq ($(OS_NAME),Linux) +# Exclude -lrt on Android by detecting Bionic. +# printf *must* be used here rather than echo -e +BIONIC := $(findstring bionic,$(shell printf "\#ifdef __BIONIC__\nbionic\n\#endif" | $(CC) -E -)) +ifeq (,$(BIONIC)) +LDFLAGS += -lrt +endif +endif + + + +# +# --- LDFLAGS cleanup ---------------------------------------------------------- +# + + + +# +# --- Include makefile fragments ----------------------------------------------- +# + +# Initialize our list of directory paths to makefile fragments with the empty +# list. This variable will accumulate all of the directory paths in which +# makefile fragments reside. +FRAGMENT_DIR_PATHS := + +# Initialize our makefile variables that source code files will be accumulated +# into by the makefile fragments. This initialization is very important! These +# variables will end up with weird contents if we don't initialize them to +# empty prior to recursively including the makefile fragments. +MK_CONFIG_SRC := +MK_KERNELS_SRC := +MK_REFKERN_SRC := +MK_FRAME_SRC := +MK_ADDON_SRC := +MK_SANDBOX_SRC := + +# -- config -- + +# Construct paths to each of the sub-configurations specified in the +# configuration list. Note that we use CONFIG_LIST_FAM, which already +# has CONFIG_NAME included (with duplicates removed). +CONFIG_PATHS := $(addprefix $(CONFIG_FRAG_PATH)/, $(CONFIG_LIST_FAM)) + +# This variable is used by the include statements as they recursively include +# one another. For the 'config' directory, we initialize it to that directory +# in preparation to include the fragments in the configuration sub-directory. +PARENT_SRC_PATH := $(CONFIG_PATH) +PARENT_PATH := $(CONFIG_FRAG_PATH) + +# Recursively include the makefile fragments in each of the sub-configuration +# directories. +-include $(addsuffix /$(FRAGMENT_MK), $(CONFIG_PATHS)) + +# -- kernels -- + +# Construct paths to each of the kernel sets required by the sub-configurations +# in the configuration list. +KERNEL_PATHS := $(addprefix $(KERNELS_FRAG_PATH)/, $(KERNEL_LIST)) + +# This variable is used by the include statements as they recursively include +# one another. For the 'kernels' directory, we initialize it to that directory +# in preparation to include the fragments in the configuration sub-directory. +PARENT_SRC_PATH := $(KERNELS_PATH) +PARENT_PATH := $(KERNELS_FRAG_PATH) + +# Recursively include the makefile fragments in each of the kernels sub- +# directories. +-include $(addsuffix /$(FRAGMENT_MK), $(KERNEL_PATHS)) + +# -- ref_kernels -- +# -- frame -- + +# This variable is used by the include statements as they recursively include +# one another. For the framework and reference kernel source trees (ie: the +# 'frame' and 'ref_kernels' directories), we initialize it to the top-level +# directory since that is its parent. +PARENT_SRC_PATH := $(DIST_PATH) +PARENT_PATH := $(OBJ_DIR)/$(CONFIG_NAME) + +# Recursively include all the makefile fragments in the directories for the +# reference kernels and portable framework. +-include $(addsuffix /$(FRAGMENT_MK), $(REFKERN_FRAG_PATH)) +-include $(addsuffix /$(FRAGMENT_MK), $(FRAME_FRAG_PATH)) + +# -- addon -- + +# Construct paths to each addon. +# NOTE: If $(ADDON_LIST) is empty (because no addon was enabled at configure- +# time) then $(ADDON_PATHS) will also be empty, which will cause no fragments +# to be included. +ADDON_PATHS := $(addprefix $(ADDON_FRAG_PATH)/, $(ADDON_LIST)) + +# This variable is used by the include statements as they recursively include +# one another. For the 'addons' directory, we initialize it to that directory +# in preparation to include the fragments in the configuration sub-directory. +PARENT_SRC_PATH := $(ADDON_PATH) +PARENT_PATH := $(ADDON_FRAG_PATH) + +# Recursively include the makefile fragments in each of the addons sub- +# directories. +-include $(addsuffix /$(FRAGMENT_MK), $(ADDON_PATHS)) + +# -- sandbox -- + +# Construct paths to each sandbox. (At present, there can be only one.) +# NOTE: If $(SANDBOX) is empty (because no sandbox was enabled at configure- +# time) then $(SANDBOX_PATHS) will also be empty, which will cause no +# fragments to be included. +SANDBOX_PATHS := $(addprefix $(SANDBOX_FRAG_PATH)/, $(SANDBOX)) + +# This variable is used by the include statements as they recursively include +# one another. For the 'sandbox' directory, we initialize it to that directory +# in preparation to include the fragments in the configuration sub-directory. +PARENT_SRC_PATH := $(SANDBOX_PATH) +PARENT_PATH := $(SANDBOX_FRAG_PATH) + +# Recursively include the makefile fragments in the sandbox sub-directory. +-include $(addsuffix /$(FRAGMENT_MK), $(SANDBOX_PATHS)) + +# -- post-processing -- + +# Create a list of the makefile fragments using the variable into which each +# of the above include statements accumulated their directory paths. +MAKEFILE_FRAGMENTS := $(addsuffix /$(FRAGMENT_MK), $(FRAGMENT_DIR_PATHS)) + +# Detect whether we actually got any makefile fragments. If we didn't, then it +# is likely that the user has not yet generated them (via configure). +ifeq ($(strip $(MAKEFILE_FRAGMENTS)),) +MAKEFILE_FRAGMENTS_PRESENT := no +else +MAKEFILE_FRAGMENTS_PRESENT := yes +endif + + +# +# --- Important sets of header files and paths --------------------------------- +# + +# Define a function that will expand all of the directory paths given in $(1) +# to actual filepaths using the list of suffixes provided in $(2). +get-filepaths = $(strip $(foreach path, $(1), \ + $(foreach suf, $(2), \ + $(wildcard $(path)/*.$(suf)) \ + ) ) ) + +# Define a function that will expand all of the directory paths given in $(1) +# to actual filepaths using the list of suffixes provided in $(2), taking only +# the first expansion from each directory with at least one file matching +# the current suffix. Finally, strip the filenames from all resulting files, +# returning only the directory paths. +get-dirpaths = $(dir $(foreach path, $(1), \ + $(firstword \ + $(foreach suf, $(2), \ + $(wildcard $(path)/*.$(suf)) \ + ) ) ) ) + +# We'll use three directory lists. The first is a list of all of the directories +# in which makefile fragments were generated, plus the current directory. (The +# current directory is needed so we include bli_config.h and bli_addon.h in the +# processing of header files.) The second and third are subsets of the first +# that begins with the addon and sandbox root paths, respectively. +ALLFRAG_DIR_PATHS := . $(FRAGMENT_DIR_PATHS) +ADDON_DIR_PATHS := $(filter $(ADDON_PATH)/%,$(ALLFRAG_DIR_PATHS)) +SANDBOX_DIR_PATHS := $(filter $(SANDBOX_PATH)/%,$(ALLFRAG_DIR_PATHS)) + +ALL_H99_FILES := $(call get-filepaths,$(ALLFRAG_DIR_PATHS),$(ALL_H99_SUFS)) +FRAME_H99_FILES := $(filter-out $(ADDON_PATH)/%, \ + $(filter-out $(SANDBOX_PATH)/%, \ + $(ALL_H99_FILES) \ + ) ) + +ALL_H99_DIRPATHS := $(call get-dirpaths,$(ALLFRAG_DIR_PATHS),$(ALL_H99_SUFS)) + +ADDON_H99_FILES := $(call get-filepaths,$(ADDON_DIR_PATHS),$(ADDON_H99_SUFS)) +ADDON_HXX_FILES := $(call get-filepaths,$(ADDON_DIR_PATHS),$(ADDON_HXX_SUFS)) +ADDON_HDR_DIRPATHS := $(call get-dirpaths,$(ADDON_DIR_PATHS),$(ALL_HDR_SUFS)) + +SANDBOX_H99_FILES := $(call get-filepaths,$(SANDBOX_DIR_PATHS),$(SANDBOX_H99_SUFS)) +SANDBOX_HXX_FILES := $(call get-filepaths,$(SANDBOX_DIR_PATHS),$(SANDBOX_HXX_SUFS)) +SANDBOX_HDR_DIRPATHS := $(call get-dirpaths,$(SANDBOX_DIR_PATHS),$(ALL_HDR_SUFS)) + + + +# +# --- blis.h header definitions ------------------------------------------------ +# + +# Use include/CONFIG_NAME as the default path to the local header files, but +# allow the includer to override this value if it needs to point to an +# installation directory. +ifeq ($(strip $(INC_PATH)),) +INC_PATH := $(INCLUDE_DIR)/$(CONFIG_NAME) +endif + +# Identify the base path for the intermediate include directory. We define +# this path in terms of INC_PATH, which gets a default value above (which is +# what happens for the top-level Makefile). If INC_PATH is specified by the +# Makefile prior to including common.mk, that path is used instead. This +# allows Makefiles for example code and test drivers to reference an installed +# prefix directory for situations when the build directory no longer exists. +BASE_INC_PATH := $(INC_PATH) + +# Isolate the path to blis.h by filtering the file from the list of framework +# header files. +BLIS_H := blis.h +BLIS_H_SRC_PATH := $(filter %/$(BLIS_H), $(FRAME_H99_FILES)) + +# Construct the path to what will be the intermediate flattened/monolithic +# blis.h file. +BLIS_H_FLAT := $(BASE_INC_PATH)/$(BLIS_H) + +# Construct the path to the helper blis.h file that will reside one directory +# up from the installed copy of blis.h. +HELP_BLIS_H_PATH := $(BUILD_DIR)/$(BLIS_H) + + +# +# --- cblas.h header definitions ----------------------------------------------- +# + +# Isolate the path to cblas.h by filtering the file from the list of framework +# header files, and then strip the filename to obtain the directory in which +# cblas.h resides. +CBLAS_H := cblas.h +CBLAS_H_SRC_PATH := $(filter %/$(CBLAS_H), $(FRAME_H99_FILES)) +CBLAS_H_DIRPATH := $(dir $(CBLAS_H_SRC_PATH)) + +# Construct the path to what will be the intermediate flattened/monolithic +# cblas.h file. +CBLAS_H_FLAT := $(BASE_INC_PATH)/$(CBLAS_H) + +# Construct the path to the helper cblas.h file that will reside one directory +# up from the installed copy of cblas.h. +HELP_CBLAS_H_PATH := $(BUILD_DIR)/$(CBLAS_H) + + +# +# --- Compiler include path definitions ---------------------------------------- +# + +# Obtain a list of header files #included inside of the bli_cntx_ref.c file. +# Due to the way that bli_cntx_ref.c uses headers and macros, paths to these +# files will be needed when compiling bli_cntx_ref.c with the monolithic header. +ifeq ($(strip $(SHARE_PATH)),.) +REF_KER_SRC := $(DIST_PATH)/$(REFKERN_DIR)/bli_cntx_ref.c +# +# NOTE: A redirect to /dev/null has been added to the grep command below because +# as of version 3.8, grep outputs warnings when encountering stray backslashes +# in regular expressions [1]. Versions older than 3.8 not only do not complain, +# but actually seem to *require* the backslash, perhaps because of the way we +# are invoking grep via GNU make's shell command. WHEN DEBUGGING ANYTHING +# INVOLVING THE MAKE VARIABLE BELOW, PLEASE CONSIDER TEMPORARILY REMOVING THE +# REDIRECT TO /dev/null SO THAT YOU SEE ANY MESSAGES SENT TO STANDARD ERROR. +# +# [1] https://lists.gnu.org/archive/html/info-gnu/2022-09/msg00001.html +# +REF_KER_HEADERS := $(shell $(GREP) "\#include" $(REF_KER_SRC) 2> $(DEVNULL) | sed -e "s/\#include [\"<]\([a-zA-Z0-9\_\.\/\-]*\)[\">].*/\1/g" | $(GREP) -v $(BLIS_H)) +endif + +# Match each header found above with the path to that header, and then strip +# leading, trailing, and internal whitespace. +REF_KER_H_PATHS := $(call rm-dups,$(strip \ + $(foreach header, $(REF_KER_HEADERS), \ + $(dir $(filter %/$(header), \ + $(FRAME_H99_FILES)))))) + +# Add -I to each header path so we can specify our include search paths to the +# C compiler. Then add frame/include since it's needed when compiling source +# files that #include bli_oapi_ba.h or bli_oapi_ex.h. +REF_KER_I_PATHS := $(strip $(patsubst %, -I%, $(REF_KER_H_PATHS))) +REF_KER_I_PATHS += -I$(DIST_PATH)/frame/include + +# Prefix the paths above with the base include path. +# NOTE: We no longer need every header path in the source tree since we +# now #include the monolithic/flattened blis.h instead. +CINCFLAGS := -I$(BASE_INC_PATH) $(REF_KER_I_PATHS) + +# If CBLAS is enabled, we also include the path to the cblas.h directory so +# that the compiler will be able to find cblas.h as the CBLAS source code is +# being compiled. +ifeq ($(MK_ENABLE_CBLAS),yes) +CINCFLAGS += -I$(CBLAS_H_DIRPATH) +endif + +# Obtain a list of header paths in the configured addons. Then add -I to each +# header path. +CADDONINCFLAGS := $(strip $(patsubst %, -I%, $(ADDON_HDR_DIRPATHS))) + +# Obtain a list of header paths in the configured sandbox. Then add -I to each +# header path. +CSANDINCFLAGS := $(strip $(patsubst %, -I%, $(SANDBOX_HDR_DIRPATHS))) + + +# +# --- BLIS configuration header definitions ------------------------------------ +# + +# These files were created by configure, but we need to define them here so we +# can remove them as part of the clean targets. +BLIS_ADDON_H := ./bli_addon.h +BLIS_CONFIG_H := ./bli_config.h + + +# +# --- Special preprocessor macro definitions ----------------------------------- +# + +# Define a C preprocessor flag that is *only* defined when BLIS is being +# compiled. (In other words, an application that #includes blis.h will not +# get this cpp macro.) +BUILD_CPPFLAGS := -DBLIS_IS_BUILDING_LIBRARY + + +# +# --- configure file location -------------------------------------------------- +# + +CONFIGURE_FILE := $(DIST_PATH)/configure + + +# end of ifndef COMMON_MK_INCLUDED conditional block +endif diff --git a/share/blis/config/a64fx/bli_kernel_defs_a64fx.h b/share/blis/config/a64fx/bli_kernel_defs_a64fx.h new file mode 100644 index 000000000..2c5c97204 --- /dev/null +++ b/share/blis/config/a64fx/bli_kernel_defs_a64fx.h @@ -0,0 +1,52 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 32 +#define BLIS_MR_d 16 +#define BLIS_MR_c 16 +#define BLIS_MR_z 8 + +#define BLIS_NR_s 10 +#define BLIS_NR_d 10 +#define BLIS_NR_c 10 +#define BLIS_NR_z 10 + +//#endif + diff --git a/share/blis/config/a64fx/make_defs.mk b/share/blis/config/a64fx/make_defs.mk new file mode 100644 index 000000000..5cc8162ba --- /dev/null +++ b/share/blis/config/a64fx/make_defs.mk @@ -0,0 +1,82 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := a64fx +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := -D_GNU_SOURCE -D_A64FX +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O3 -ftree-vectorize -march=armv8-a+sve +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) +CKVECFLAGS := + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/altra/bli_kernel_defs_altra.h b/share/blis/config/altra/bli_kernel_defs_altra.h new file mode 100644 index 000000000..815c59399 --- /dev/null +++ b/share/blis/config/altra/bli_kernel_defs_altra.h @@ -0,0 +1,48 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2023, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 8 +#define BLIS_MR_d 6 + +#define BLIS_NR_s 12 +#define BLIS_NR_d 8 + +//#endif + diff --git a/share/blis/config/altra/make_defs.mk b/share/blis/config/altra/make_defs.mk new file mode 100644 index 000000000..ef1e337db --- /dev/null +++ b/share/blis/config/altra/make_defs.mk @@ -0,0 +1,90 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := altra +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := -D_GNU_SOURCE +CMISCFLAGS := +CPICFLAGS := +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 -mcpu=neoverse-n1 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 -ftree-vectorize +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mcpu=neoverse-n1 +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := -mcpu=neoverse-n1 +else +$(error gcc or clang is required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/altramax/bli_kernel_defs_altramax.h b/share/blis/config/altramax/bli_kernel_defs_altramax.h new file mode 100644 index 000000000..815c59399 --- /dev/null +++ b/share/blis/config/altramax/bli_kernel_defs_altramax.h @@ -0,0 +1,48 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2023, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 8 +#define BLIS_MR_d 6 + +#define BLIS_NR_s 12 +#define BLIS_NR_d 8 + +//#endif + diff --git a/share/blis/config/altramax/make_defs.mk b/share/blis/config/altramax/make_defs.mk new file mode 100644 index 000000000..35bd7de48 --- /dev/null +++ b/share/blis/config/altramax/make_defs.mk @@ -0,0 +1,90 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := altramax +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := -D_GNU_SOURCE +CMISCFLAGS := +CPICFLAGS := +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 -mcpu=neoverse-n1 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 -ftree-vectorize +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mcpu=neoverse-n1 +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := -mcpu=neoverse-n1 +else +$(error gcc or clang is required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/amd64/make_defs.mk b/share/blis/config/amd64/make_defs.mk new file mode 100644 index 000000000..bbe4d8d5f --- /dev/null +++ b/share/blis/config/amd64/make_defs.mk @@ -0,0 +1,69 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := amd64 +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Setting for reference and optimized kernels are taken from individual +# subconfiguration makefile fragments in this family. + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/amd64_legacy/make_defs.mk b/share/blis/config/amd64_legacy/make_defs.mk new file mode 100644 index 000000000..914f533ae --- /dev/null +++ b/share/blis/config/amd64_legacy/make_defs.mk @@ -0,0 +1,70 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := amd64_legacy +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Setting for reference and optimized kernels are taken from individual +# subconfiguration makefile fragments in this family. + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/arm32/make_defs.mk b/share/blis/config/arm32/make_defs.mk new file mode 100644 index 000000000..ee9529638 --- /dev/null +++ b/share/blis/config/arm32/make_defs.mk @@ -0,0 +1,86 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := arm32 +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := -mfloat-abi=hard -mfpu=neon +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -march=armv7-a +else +$(error gcc is required for this configuration.) +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/arm64/make_defs.mk b/share/blis/config/arm64/make_defs.mk new file mode 100644 index 000000000..1f8c2e84b --- /dev/null +++ b/share/blis/config/arm64/make_defs.mk @@ -0,0 +1,90 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := arm64 +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := -D_GNU_SOURCE +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -march=armv8-a +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := -march=armv8-a +else +$(error gcc or clang is required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/armsve/bli_kernel_defs_armsve.h b/share/blis/config/armsve/bli_kernel_defs_armsve.h new file mode 100644 index 000000000..8c9c0b0dd --- /dev/null +++ b/share/blis/config/armsve/bli_kernel_defs_armsve.h @@ -0,0 +1,58 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +// +// The armsve configuration handles both 256-bit and 512-bit SVE vectors, +// so it is not possible to define specific register block sizes. Thus, +// armsve can't use reference kernels! +// + +#define BLIS_MR_s -1 +#define BLIS_MR_d -1 +#define BLIS_MR_c -1 +#define BLIS_MR_z -1 + +#define BLIS_NR_s 10 +#define BLIS_NR_d 10 +#define BLIS_NR_c 10 +#define BLIS_NR_z 10 + +//#endif + diff --git a/share/blis/config/armsve/make_defs.mk b/share/blis/config/armsve/make_defs.mk new file mode 100644 index 000000000..340b52f31 --- /dev/null +++ b/share/blis/config/armsve/make_defs.mk @@ -0,0 +1,82 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := armsve +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := -D_GNU_SOURCE +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O3 -ftree-vectorize -march=armv8-a+sve +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) +CKVECFLAGS := + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/bgq/bli_kernel_defs_bgq.h b/share/blis/config/bgq/bli_kernel_defs_bgq.h new file mode 100644 index 000000000..bd3962e45 --- /dev/null +++ b/share/blis/config/bgq/bli_kernel_defs_bgq.h @@ -0,0 +1,48 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_d 8 +#define BLIS_MR_z 4 + +#define BLIS_NR_d 8 +#define BLIS_NR_z 4 + +//#endif + diff --git a/share/blis/config/bgq/make_defs.mk b/share/blis/config/bgq/make_defs.mk new file mode 100644 index 000000000..fa4479956 --- /dev/null +++ b/share/blis/config/bgq/make_defs.mk @@ -0,0 +1,102 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := bgq +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +#ifeq ($(CC),) +#CC := /bgsys/drivers/ppcfloor/comm/gcc.legacy/bin/mpixlc_r +#CC_VENDOR := ibm +#endif + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := -I/bgsys/drivers/ppcfloor -I/bgsys/drivers/ppcfloor/spi/include/kernel/cnk +ifeq ($(CC_VENDOR),ibm) +CMISCFLAGS := -qthreaded -qsmp=omp -qasm=gcc -qkeyword=asm # -qreport -qsource -qlistopt -qlist +else ifeq ($(CC_VENDOR),clang) +CMISCFLAGS := -fopenmp +else +$(error xlc or bgclang is required for this configuration.) +endif +CPICFLAGS := -fPIC +CWARNFLAGS := -w + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),ibm) +CKVECFLAGS := -qarch=qp -qtune=qp -qsimd=auto -qhot=level=1 -qprefetch -qunroll=yes -qnoipa +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Override the default value for LDFLAGS. +ifeq ($(CC_VENDOR),ibm) +LDFLAGS := -L/bgsys/drivers/ppcfloor/spi/lib -lSPI -lSPI_cnk -qthreaded -qsmp=omp +else ifeq ($(CC_VENDOR),clang) +LDFLAGS := -L/bgsys/drivers/ppcfloor/spi/lib -lSPI -lSPI_cnk -fopenmp +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/bulldozer/bli_kernel_defs_bulldozer.h b/share/blis/config/bulldozer/bli_kernel_defs_bulldozer.h new file mode 100644 index 000000000..ea1e58e66 --- /dev/null +++ b/share/blis/config/bulldozer/bli_kernel_defs_bulldozer.h @@ -0,0 +1,52 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 8 +#define BLIS_MR_d 4 +#define BLIS_MR_c 8 +#define BLIS_MR_z 4 + +#define BLIS_NR_s 8 +#define BLIS_NR_d 6 +#define BLIS_NR_c 4 +#define BLIS_NR_z 4 + +//#endif + diff --git a/share/blis/config/bulldozer/make_defs.mk b/share/blis/config/bulldozer/make_defs.mk new file mode 100644 index 000000000..e3e208862 --- /dev/null +++ b/share/blis/config/bulldozer/make_defs.mk @@ -0,0 +1,90 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := bulldozer +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mfpmath=sse -mavx -mfma4 -march=bdver1 -mno-tbm -mno-xop -mno-lwp +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := -mfpmath=sse -mavx -mfma4 -march=bdver1 -mno-tbm -mno-xop -mno-lwp +else +$(error gcc or clang are required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/cortexa15/bli_kernel_defs_cortexa15.h b/share/blis/config/cortexa15/bli_kernel_defs_cortexa15.h new file mode 100644 index 000000000..9c413f7f8 --- /dev/null +++ b/share/blis/config/cortexa15/bli_kernel_defs_cortexa15.h @@ -0,0 +1,48 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 4 +#define BLIS_MR_d 4 + +#define BLIS_NR_s 4 +#define BLIS_NR_d 4 + +//#endif + diff --git a/share/blis/config/cortexa15/make_defs.mk b/share/blis/config/cortexa15/make_defs.mk new file mode 100644 index 000000000..3a9a83b39 --- /dev/null +++ b/share/blis/config/cortexa15/make_defs.mk @@ -0,0 +1,86 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := cortexa15 +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := -mfloat-abi=hard -mfpu=neon +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mcpu=cortex-a15 +else +$(error gcc is required for this configuration.) +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/cortexa53/bli_kernel_defs_cortexa53.h b/share/blis/config/cortexa53/bli_kernel_defs_cortexa53.h new file mode 100644 index 000000000..60292099c --- /dev/null +++ b/share/blis/config/cortexa53/bli_kernel_defs_cortexa53.h @@ -0,0 +1,48 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 8 +#define BLIS_MR_d 6 + +#define BLIS_NR_s 12 +#define BLIS_NR_d 8 + +//#endif + diff --git a/share/blis/config/cortexa53/make_defs.mk b/share/blis/config/cortexa53/make_defs.mk new file mode 100644 index 000000000..6036ea55a --- /dev/null +++ b/share/blis/config/cortexa53/make_defs.mk @@ -0,0 +1,90 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := cortexa53 +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := -D_GNU_SOURCE +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 -mcpu=cortex-a53 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 -ftree-vectorize +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mcpu=cortex-a53 +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := -mcpu=cortex-a53 +else +$(error gcc or clang is required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/cortexa57/bli_kernel_defs_cortexa57.h b/share/blis/config/cortexa57/bli_kernel_defs_cortexa57.h new file mode 100644 index 000000000..60292099c --- /dev/null +++ b/share/blis/config/cortexa57/bli_kernel_defs_cortexa57.h @@ -0,0 +1,48 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 8 +#define BLIS_MR_d 6 + +#define BLIS_NR_s 12 +#define BLIS_NR_d 8 + +//#endif + diff --git a/share/blis/config/cortexa57/make_defs.mk b/share/blis/config/cortexa57/make_defs.mk new file mode 100644 index 000000000..d84f8538a --- /dev/null +++ b/share/blis/config/cortexa57/make_defs.mk @@ -0,0 +1,90 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := cortexa57 +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := -D_GNU_SOURCE +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 -mcpu=cortex-a57 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 -ftree-vectorize +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mcpu=cortex-a57 +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := -mcpu=cortex-a57 +else +$(error gcc or clang is required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/cortexa9/bli_kernel_defs_cortexa9.h b/share/blis/config/cortexa9/bli_kernel_defs_cortexa9.h new file mode 100644 index 000000000..9c413f7f8 --- /dev/null +++ b/share/blis/config/cortexa9/bli_kernel_defs_cortexa9.h @@ -0,0 +1,48 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 4 +#define BLIS_MR_d 4 + +#define BLIS_NR_s 4 +#define BLIS_NR_d 4 + +//#endif + diff --git a/share/blis/config/cortexa9/make_defs.mk b/share/blis/config/cortexa9/make_defs.mk new file mode 100644 index 000000000..f5f19e530 --- /dev/null +++ b/share/blis/config/cortexa9/make_defs.mk @@ -0,0 +1,86 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := cortexa9 +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := -mfloat-abi=hard -mfpu=neon +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mcpu=cortex-a9 +else +$(error gcc is required for this configuration.) +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/excavator/bli_kernel_defs_excavator.h b/share/blis/config/excavator/bli_kernel_defs_excavator.h new file mode 100644 index 000000000..df4a8c411 --- /dev/null +++ b/share/blis/config/excavator/bli_kernel_defs_excavator.h @@ -0,0 +1,52 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 16 +#define BLIS_MR_d 8 +#define BLIS_MR_c 4 +#define BLIS_MR_z 2 + +#define BLIS_NR_s 3 +#define BLIS_NR_d 3 +#define BLIS_NR_c 2 +#define BLIS_NR_z 2 + +//#endif + diff --git a/share/blis/config/excavator/make_defs.mk b/share/blis/config/excavator/make_defs.mk new file mode 100644 index 000000000..7977806b2 --- /dev/null +++ b/share/blis/config/excavator/make_defs.mk @@ -0,0 +1,90 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := excavator +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mfpmath=sse -mavx -mfma -march=bdver4 -mno-fma4 -mno-tbm -mno-xop -mno-lwp +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := -mfpmath=sse -mavx -mfma -march=bdver4 -mno-fma4 -mno-tbm -mno-xop -mno-lwp +else +$(error gcc or clang are required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/firestorm/bli_kernel_defs_firestorm.h b/share/blis/config/firestorm/bli_kernel_defs_firestorm.h new file mode 100644 index 000000000..60292099c --- /dev/null +++ b/share/blis/config/firestorm/bli_kernel_defs_firestorm.h @@ -0,0 +1,48 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 8 +#define BLIS_MR_d 6 + +#define BLIS_NR_s 12 +#define BLIS_NR_d 8 + +//#endif + diff --git a/share/blis/config/firestorm/make_defs.mk b/share/blis/config/firestorm/make_defs.mk new file mode 100644 index 000000000..2353e0040 --- /dev/null +++ b/share/blis/config/firestorm/make_defs.mk @@ -0,0 +1,82 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := firestorm +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := -D_GNU_SOURCE +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 -march=armv8-a +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 -ftree-vectorize +CKVECFLAGS := -march=armv8-a + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/generic/bli_kernel_defs_generic.h b/share/blis/config/generic/bli_kernel_defs_generic.h new file mode 100644 index 000000000..db2f32947 --- /dev/null +++ b/share/blis/config/generic/bli_kernel_defs_generic.h @@ -0,0 +1,42 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +//#endif + diff --git a/share/blis/config/generic/make_defs.mk b/share/blis/config/generic/make_defs.mk new file mode 100644 index 000000000..cbe4fb86f --- /dev/null +++ b/share/blis/config/generic/make_defs.mk @@ -0,0 +1,98 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := generic +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := +else +ifeq ($(CC_VENDOR),icc) +CKVECFLAGS := +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := +else +ifeq ($(CC_VENDOR),nvc) +CKVECFLAGS := +else +$(error gcc, icc, nvc, or clang is required for this configuration.) +endif +endif +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/haswell/bli_kernel_defs_haswell.h b/share/blis/config/haswell/bli_kernel_defs_haswell.h new file mode 100644 index 000000000..c5bc8d63f --- /dev/null +++ b/share/blis/config/haswell/bli_kernel_defs_haswell.h @@ -0,0 +1,52 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 6 +#define BLIS_MR_d 6 +#define BLIS_MR_c 3 +#define BLIS_MR_z 3 + +#define BLIS_NR_s 16 +#define BLIS_NR_d 8 +#define BLIS_NR_c 8 +#define BLIS_NR_z 4 + +//#endif + diff --git a/share/blis/config/haswell/make_defs.mk b/share/blis/config/haswell/make_defs.mk new file mode 100644 index 000000000..6f7b5b49a --- /dev/null +++ b/share/blis/config/haswell/make_defs.mk @@ -0,0 +1,100 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := haswell +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +# NOTE: The -fomit-frame-pointer option is needed for some kernels because +# they make explicit use of the rbp register. +CKOPTFLAGS := $(COPTFLAGS) -O3 -fomit-frame-pointer +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mavx2 -mfma -mfpmath=sse -march=haswell +ifeq ($(GCC_OT_4_9_0),yes) +# If gcc is older than 4.9.0, we must use a different label for -march. +CKVECFLAGS := -mavx2 -mfma -mfpmath=sse -march=core-avx2 +endif +else +ifeq ($(CC_VENDOR),icc) +CKVECFLAGS := -xCORE-AVX2 +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := -mavx2 -mfma -mfpmath=sse -march=haswell +else +$(error gcc, icc, or clang is required for this configuration.) +endif +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/intel64/make_defs.mk b/share/blis/config/intel64/make_defs.mk new file mode 100644 index 000000000..3f62cef57 --- /dev/null +++ b/share/blis/config/intel64/make_defs.mk @@ -0,0 +1,94 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := intel64 +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mssse3 -mfpmath=sse -march=core2 +else +ifeq ($(CC_VENDOR),icc) +CKVECFLAGS := -xSSSE3 +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := -mssse3 -mfpmath=sse -march=core2 +else +$(error gcc, icc, or clang is required for this configuration.) +endif +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/knl/bli_kernel_defs_knl.h b/share/blis/config/knl/bli_kernel_defs_knl.h new file mode 100644 index 000000000..ce514bb21 --- /dev/null +++ b/share/blis/config/knl/bli_kernel_defs_knl.h @@ -0,0 +1,48 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 24 +#define BLIS_MR_d 24 + +#define BLIS_NR_s 16 +#define BLIS_NR_d 8 + +//#endif + diff --git a/share/blis/config/knl/make_defs.mk b/share/blis/config/knl/make_defs.mk new file mode 100644 index 000000000..5458745b9 --- /dev/null +++ b/share/blis/config/knl/make_defs.mk @@ -0,0 +1,118 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := knl +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +ifeq ($(DEBUG_TYPE),sde) +# Unconditionally disable use of libmemkind in Intel SDE. +# Note: The BLIS_DISABLE_MEMKIND macro definition will override +# (undefine) the BLIS_ENABLE_MEMKIND macro definition. +CPPROCFLAGS += -DBLIS_DISABLE_MEMKIND +# This value is normally set by configure and communicated to make via +# config.mk, however, the make_defs.mk files (this file) get included +# after config.mk, so this definition will override that earlier +# definition. +MK_ENABLE_MEMKIND := no +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mavx512f -mavx512pf -mfpmath=sse -march=knl +else +ifeq ($(CC_VENDOR),icc) +CKVECFLAGS := -xMIC-AVX512 +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := -mavx512f -mavx512pf -mfpmath=sse -march=knl +else +$(error gcc, icc, or clang is required for this configuration.) +endif +endif +endif + +# The assembler on OS X won't recognize AVX512 without help. +ifneq ($(CC_VENDOR),icc) +ifeq ($(OS_NAME),Darwin) +CKVECFLAGS += -Wa,-march=knl +endif +endif + +# Flags specific to reference kernels. +# Note: We use AVX2 for reference kernels instead of AVX-512. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := -march=knl -mno-avx512f -mno-avx512pf -mno-avx512er -mno-avx512cd -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),icc) +CRVECFLAGS := -xMIC-AVX512 +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := -march=knl -mno-avx512f -mno-avx512pf -mno-avx512er -mno-avx512cd -funsafe-math-optimizations -ffp-contract=fast +else +$(error gcc, icc, or clang is required for this configuration.) +endif +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/penryn/bli_kernel_defs_penryn.h b/share/blis/config/penryn/bli_kernel_defs_penryn.h new file mode 100644 index 000000000..f1e483646 --- /dev/null +++ b/share/blis/config/penryn/bli_kernel_defs_penryn.h @@ -0,0 +1,48 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 8 +#define BLIS_MR_d 4 + +#define BLIS_NR_s 4 +#define BLIS_NR_d 4 + +//#endif + diff --git a/share/blis/config/penryn/make_defs.mk b/share/blis/config/penryn/make_defs.mk new file mode 100644 index 000000000..d070b7f1a --- /dev/null +++ b/share/blis/config/penryn/make_defs.mk @@ -0,0 +1,94 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := penryn +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mssse3 -mfpmath=sse -march=core2 +else +ifeq ($(CC_VENDOR),icc) +CKVECFLAGS := -xSSSE3 +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := -mssse3 -mfpmath=sse -march=core2 +else +$(error gcc, icc, or clang is required for this configuration.) +endif +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/piledriver/bli_kernel_defs_piledriver.h b/share/blis/config/piledriver/bli_kernel_defs_piledriver.h new file mode 100644 index 000000000..df4a8c411 --- /dev/null +++ b/share/blis/config/piledriver/bli_kernel_defs_piledriver.h @@ -0,0 +1,52 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 16 +#define BLIS_MR_d 8 +#define BLIS_MR_c 4 +#define BLIS_MR_z 2 + +#define BLIS_NR_s 3 +#define BLIS_NR_d 3 +#define BLIS_NR_c 2 +#define BLIS_NR_z 2 + +//#endif + diff --git a/share/blis/config/piledriver/make_defs.mk b/share/blis/config/piledriver/make_defs.mk new file mode 100644 index 000000000..56b7d0fc5 --- /dev/null +++ b/share/blis/config/piledriver/make_defs.mk @@ -0,0 +1,90 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := piledriver +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mfpmath=sse -mavx -mfma -march=bdver2 -mno-fma4 -mno-tbm -mno-xop -mno-lwp +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := -mfpmath=sse -mavx -mfma -march=bdver2 -mno-fma4 -mno-tbm -mno-xop -mno-lwp +else +$(error gcc or clang are required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/power/make_defs.mk b/share/blis/config/power/make_defs.mk new file mode 100644 index 000000000..8350a0a5c --- /dev/null +++ b/share/blis/config/power/make_defs.mk @@ -0,0 +1,82 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := power +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +CKVECFLAGS := + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/power10/bli_kernel_defs_power10.h b/share/blis/config/power10/bli_kernel_defs_power10.h new file mode 100644 index 000000000..9b47a77c0 --- /dev/null +++ b/share/blis/config/power10/bli_kernel_defs_power10.h @@ -0,0 +1,49 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 8 +#define BLIS_MR_d 8 + +#define BLIS_NR_s 16 +#define BLIS_NR_d 8 + + +//#endif + diff --git a/share/blis/config/power10/make_defs.mk b/share/blis/config/power10/make_defs.mk new file mode 100644 index 000000000..191a3e42a --- /dev/null +++ b/share/blis/config/power10/make_defs.mk @@ -0,0 +1,83 @@ + +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2019, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := power10 +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mcpu=power10 -mtune=power10 +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := -mcpu=power10 -mtune=power10 +else +$(info $(CC_VENDOR)) +$(error gcc, clang is required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +CRVECFLAGS := $(CKVECFLAGS) + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) diff --git a/share/blis/config/power9/bli_kernel_defs_power9.h b/share/blis/config/power9/bli_kernel_defs_power9.h new file mode 100644 index 000000000..debfeac5f --- /dev/null +++ b/share/blis/config/power9/bli_kernel_defs_power9.h @@ -0,0 +1,49 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_d 12 + +#define BLIS_NR_d 6 + +#define BLIS_BBN_s 4 +#define BLIS_BBN_d 2 + +//#endif + diff --git a/share/blis/config/power9/make_defs.mk b/share/blis/config/power9/make_defs.mk new file mode 100644 index 000000000..9f604a607 --- /dev/null +++ b/share/blis/config/power9/make_defs.mk @@ -0,0 +1,84 @@ + +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2019, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := power9 +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mcpu=power9 -mtune=power9 -DXLC=0 +else +ifeq ($(CC_VENDOR),IBM) +CKVECFLAGS := -qarch=pwr9 -qtune=pwr9 -DXLC=1 +else +$(info $(CC_VENDOR)) +$(error gcc/xlc is required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +CRVECFLAGS := $(CKVECFLAGS) + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/rv32i/bli_kernel_defs_rv32i.h b/share/blis/config/rv32i/bli_kernel_defs_rv32i.h new file mode 100644 index 000000000..fe51f998d --- /dev/null +++ b/share/blis/config/rv32i/bli_kernel_defs_rv32i.h @@ -0,0 +1,43 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +// Fall through to generic sizes + +//#endif diff --git a/share/blis/config/rv32i/make_defs.mk b/share/blis/config/rv32i/make_defs.mk new file mode 100644 index 000000000..21128717f --- /dev/null +++ b/share/blis/config/rv32i/make_defs.mk @@ -0,0 +1,102 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := rv32i +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := -DRISCV_SIZE=32 + +RISCV_ARCH := $(shell $(CC) -E build/detect/riscv/bli_riscv_detect_arch.h | grep '^[^\#]') +RISCV_ABI := $(shell $(CC) -E build/detect/riscv/bli_riscv_detect_abi.h | grep '^[^\#]') + +ifeq (,$(findstring 32,$(RISCV_ARCH))) +$(error The RISC-V compiler architecture $(RISCV_ARCH) is not compatible with $(THIS_CONFIG)) +else ifeq (,$(findstring 32,$(RISCV_ABI))) +$(error The RISC-V compiler ABI $(RISCV_ABI) is not compatible with $(THIS_CONFIG)) +endif + +CMISCFLAGS := -march=$(RISCV_ARCH) -mabi=$(RISCV_ABI) +CPICFLAGS := -fPIC +CWARNFLAGS := -Wall -Wno-unused-function -Wfatal-errors + +# In case the A extension is not available +LDFLAGS += -latomic + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := +else +$(error gcc or clang is required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) diff --git a/share/blis/config/rv32iv/bli_kernel_defs_rv32iv.h b/share/blis/config/rv32iv/bli_kernel_defs_rv32iv.h new file mode 100644 index 000000000..b17989208 --- /dev/null +++ b/share/blis/config/rv32iv/bli_kernel_defs_rv32iv.h @@ -0,0 +1,43 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + + + +//#endif diff --git a/share/blis/config/rv32iv/make_defs.mk b/share/blis/config/rv32iv/make_defs.mk new file mode 100644 index 000000000..9daaee3d6 --- /dev/null +++ b/share/blis/config/rv32iv/make_defs.mk @@ -0,0 +1,104 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := rv32iv +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := -DRISCV_SIZE=32 + +RISCV_ARCH := $(shell $(CC) -DFORCE_RISCV_VECTOR -E build/detect/riscv/bli_riscv_detect_arch.h | grep '^[^\#]') +RISCV_ABI := $(shell $(CC) -DFORCE_RISCV_VECTOR -E build/detect/riscv/bli_riscv_detect_abi.h | grep '^[^\#]') + +ifeq (,$(findstring 32,$(RISCV_ARCH))) +$(error The RISC-V compiler architecture $(RISCV_ARCH) is not compatible with $(THIS_CONFIG)) +else ifeq (,$(findstring 32,$(RISCV_ABI))) +$(error The RISC-V compiler ABI $(RISCV_ABI) is not compatible with $(THIS_CONFIG)) +endif + +CMISCFLAGS := -march=$(RISCV_ARCH) -mabi=$(RISCV_ABI) +CPICFLAGS := -fPIC +CWARNFLAGS := -Wall -Wno-unused-function -Wfatal-errors + +# In case the A extension is not available +LDFLAGS += -latomic + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O0 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := +else +$(error gcc or clang is required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +# Lower compiler optimization to -O1. At -O3, gcc version 12.0.1 20220505 +# computes offsets for the matrix ab in the ref gemm kernel incorrectly. +CRVECFLAGS := $(CKVECFLAGS) -O1 +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) diff --git a/share/blis/config/rv64i/bli_kernel_defs_rv64i.h b/share/blis/config/rv64i/bli_kernel_defs_rv64i.h new file mode 100644 index 000000000..fe51f998d --- /dev/null +++ b/share/blis/config/rv64i/bli_kernel_defs_rv64i.h @@ -0,0 +1,43 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +// Fall through to generic sizes + +//#endif diff --git a/share/blis/config/rv64i/make_defs.mk b/share/blis/config/rv64i/make_defs.mk new file mode 100644 index 000000000..7c055f012 --- /dev/null +++ b/share/blis/config/rv64i/make_defs.mk @@ -0,0 +1,102 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := rv64i +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := -DRISCV_SIZE=64 + +RISCV_ARCH := $(shell $(CC) -E build/detect/riscv/bli_riscv_detect_arch.h | grep '^[^\#]') +RISCV_ABI := $(shell $(CC) -E build/detect/riscv/bli_riscv_detect_abi.h | grep '^[^\#]') + +ifeq (,$(findstring 64,$(RISCV_ARCH))) +$(error The RISC-V compiler architecture $(RISCV_ARCH) is not compatible with $(THIS_CONFIG)) +else ifeq (,$(findstring 64,$(RISCV_ABI))) +$(error The RISC-V compiler ABI $(RISCV_ABI) is not compatible with $(THIS_CONFIG)) +endif + +CMISCFLAGS := -march=$(RISCV_ARCH) -mabi=$(RISCV_ABI) +CPICFLAGS := -fPIC +CWARNFLAGS := -Wall -Wno-unused-function -Wfatal-errors + +# In case the A extension is not available +LDFLAGS += -latomic + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := +else +$(error gcc or clang is required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) diff --git a/share/blis/config/rv64iv/bli_kernel_defs_rv64iv.h b/share/blis/config/rv64iv/bli_kernel_defs_rv64iv.h new file mode 100644 index 000000000..18ca4030e --- /dev/null +++ b/share/blis/config/rv64iv/bli_kernel_defs_rv64iv.h @@ -0,0 +1,42 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + + +//#endif diff --git a/share/blis/config/rv64iv/make_defs.mk b/share/blis/config/rv64iv/make_defs.mk new file mode 100644 index 000000000..9ec5a889a --- /dev/null +++ b/share/blis/config/rv64iv/make_defs.mk @@ -0,0 +1,103 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := rv64iv +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := -DRISCV_SIZE=64 + +RISCV_ARCH := $(shell $(CC) -DFORCE_RISCV_VECTOR -E build/detect/riscv/bli_riscv_detect_arch.h | grep '^[^\#]') +RISCV_ABI := $(shell $(CC) -DFORCE_RISCV_VECTOR -E build/detect/riscv/bli_riscv_detect_abi.h | grep '^[^\#]') + +ifeq (,$(findstring 64,$(RISCV_ARCH))) +$(error The RISC-V compiler architecture $(RISCV_ARCH) is not compatible with $(THIS_CONFIG)) +else ifeq (,$(findstring 64,$(RISCV_ABI))) +$(error The RISC-V compiler ABI $(RISCV_ABI) is not compatible with $(THIS_CONFIG)) +endif + +CMISCFLAGS := -march=$(RISCV_ARCH) -mabi=$(RISCV_ABI) +CPICFLAGS := -fPIC +CWARNFLAGS := -Wall -Wno-unused-function -Wfatal-errors + +# In case the A extension is not available +LDFLAGS += -latomic + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 -ftree-vectorize +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := +else +$(error gcc or clang is required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +# Lower compiler optimization. cinvscalv fails at -O1 +CRVECFLAGS := $(CKVECFLAGS) -O0 +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) diff --git a/share/blis/config/rvv_sg2042/bli_kernel_defs_rvv_sg2042.h b/share/blis/config/rvv_sg2042/bli_kernel_defs_rvv_sg2042.h new file mode 100644 index 000000000..18ca4030e --- /dev/null +++ b/share/blis/config/rvv_sg2042/bli_kernel_defs_rvv_sg2042.h @@ -0,0 +1,42 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + + +//#endif diff --git a/share/blis/config/rvv_sg2042/make_defs.mk b/share/blis/config/rvv_sg2042/make_defs.mk new file mode 100644 index 000000000..0244f38ea --- /dev/null +++ b/share/blis/config/rvv_sg2042/make_defs.mk @@ -0,0 +1,105 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := rvv_sg2042 +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := -DRISCV_SIZE=64 + +#RISCV_ARCH := $(shell $(CC) -DFORCE_RISCV_VECTOR -E build/detect/riscv/bli_riscv_detect_arch.h | grep '^[^\#]') +#RISCV_ABI := $(shell $(CC) -DFORCE_RISCV_VECTOR -E build/detect/riscv/bli_riscv_detect_abi.h | grep '^[^\#]') +RISCV_ARCH := rv64gc_zfh_xtheadvector +RISCV_ABI := lp64d + +ifeq (,$(findstring 64,$(RISCV_ARCH))) +$(error The RISC-V compiler architecture $(RISCV_ARCH) is not compatible with $(THIS_CONFIG)) +else ifeq (,$(findstring 64,$(RISCV_ABI))) +$(error The RISC-V compiler ABI $(RISCV_ABI) is not compatible with $(THIS_CONFIG)) +endif + +CMISCFLAGS := -march=$(RISCV_ARCH) -mabi=$(RISCV_ABI) +CPICFLAGS := -fPIC +CWARNFLAGS := -Wall -Wno-unused-function -Wfatal-errors + +# In case the A extension is not available +LDFLAGS += -latomic + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 -ftree-vectorize +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := +else +$(error gcc or clang is required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +# Lower compiler optimization. cinvscalv fails at -O1 +CRVECFLAGS := $(CKVECFLAGS) -O0 +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) diff --git a/share/blis/config/sandybridge/bli_kernel_defs_sandybridge.h b/share/blis/config/sandybridge/bli_kernel_defs_sandybridge.h new file mode 100644 index 000000000..dc1b843f6 --- /dev/null +++ b/share/blis/config/sandybridge/bli_kernel_defs_sandybridge.h @@ -0,0 +1,52 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 8 +#define BLIS_MR_d 8 +#define BLIS_MR_c 8 +#define BLIS_MR_z 4 + +#define BLIS_NR_s 8 +#define BLIS_NR_d 4 +#define BLIS_NR_c 4 +#define BLIS_NR_z 4 + +//#endif + diff --git a/share/blis/config/sandybridge/make_defs.mk b/share/blis/config/sandybridge/make_defs.mk new file mode 100644 index 000000000..6047787cd --- /dev/null +++ b/share/blis/config/sandybridge/make_defs.mk @@ -0,0 +1,98 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := sandybridge +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mavx -mfpmath=sse -march=sandybridge +ifeq ($(GCC_OT_4_9_0),yes) +# If gcc is older than 4.9.0, we must use a different label for -march. +CKVECFLAGS := -mavx -mfpmath=sse -march=corei7-avx +endif +else +ifeq ($(CC_VENDOR),icc) +CKVECFLAGS := -xAVX +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := -mavx -mfpmath=sse -march=sandybridge +else +$(error gcc, icc, or clang is required for this configuration.) +endif +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/sifive_rvv/bli_kernel_defs_sifive_rvv.h b/share/blis/config/sifive_rvv/bli_kernel_defs_sifive_rvv.h new file mode 100644 index 000000000..33543db50 --- /dev/null +++ b/share/blis/config/sifive_rvv/bli_kernel_defs_sifive_rvv.h @@ -0,0 +1,55 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2024, SiFive, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- +#define BLIS_MR_s 7 +#define BLIS_MR_d 7 +#define BLIS_MR_c 6 +#define BLIS_MR_z 6 + +#define BLIS_PACKMR_s 8 +#define BLIS_PACKMR_d 8 +#define BLIS_PACKMR_c 8 +#define BLIS_PACKMR_z 8 + +#define BLIS_NR_s -1 +#define BLIS_NR_d -1 +#define BLIS_NR_c -1 +#define BLIS_NR_z -1 +//#endif + diff --git a/share/blis/config/sifive_rvv/make_defs.mk b/share/blis/config/sifive_rvv/make_defs.mk new file mode 100644 index 000000000..a4b3675e1 --- /dev/null +++ b/share/blis/config/sifive_rvv/make_defs.mk @@ -0,0 +1,80 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2024, SiFive, Inc. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := sifive_rvv +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CMISCFLAGS_SIFIVE := -mcmodel=medany -march=rv64gcv_zba_zbb -mabi=lp64d +CMISCFLAGS_SIFIVE_OTHER := +CPPROCFLAGS := +CMISCFLAGS := $(CMISCFLAGS_SIFIVE) $(CMISCFLAGS_SIFIVE_OTHER) \ + -fdata-sections -ffunction-sections \ + -fdiagnostics-color=always -fno-rtti -fno-exceptions +CPICFLAGS := -fPIC +CWARNFLAGS := -Wall -Wextra -Wno-unused-function -Wno-unused-parameter \ + -Wno-sign-compare -Wno-unused-variable + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O3 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) +CKVECFLAGS := + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +CRVECFLAGS := $(CKVECFLAGS) + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/sifive_x280/bli_kernel_defs_sifive_x280.h b/share/blis/config/sifive_x280/bli_kernel_defs_sifive_x280.h new file mode 100644 index 000000000..bb6865a66 --- /dev/null +++ b/share/blis/config/sifive_x280/bli_kernel_defs_sifive_x280.h @@ -0,0 +1,55 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2023, SiFive, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- +#define BLIS_MR_s 7 +#define BLIS_MR_d 7 +#define BLIS_MR_c 6 +#define BLIS_MR_z 6 + +#define BLIS_PACKMR_s 8 +#define BLIS_PACKMR_d 8 +#define BLIS_PACKMR_c 8 +#define BLIS_PACKMR_z 8 + +#define BLIS_NR_s 64 +#define BLIS_NR_d 32 +#define BLIS_NR_c 32 +#define BLIS_NR_z 16 +//#endif + diff --git a/share/blis/config/sifive_x280/make_defs.mk b/share/blis/config/sifive_x280/make_defs.mk new file mode 100644 index 000000000..5f19e4e44 --- /dev/null +++ b/share/blis/config/sifive_x280/make_defs.mk @@ -0,0 +1,80 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2023, SiFive, Inc. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := sifive_x280 +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CMISCFLAGS_SIFIVE := -mcmodel=medany -march=rv64gcv_zba_zbb_zvl512b -mabi=lp64d +CMISCFLAGS_SIFIVE_OTHER := +CPPROCFLAGS := +CMISCFLAGS := $(CMISCFLAGS_SIFIVE) $(CMISCFLAGS_SIFIVE_OTHER) \ + -fdata-sections -ffunction-sections \ + -fdiagnostics-color=always -fno-rtti -fno-exceptions +CPICFLAGS := -fPIC +CWARNFLAGS := -Wall -Wextra -Wno-unused-function -Wno-unused-parameter \ + -Wno-sign-compare -Wno-unused-variable + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O3 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) +CKVECFLAGS := + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +CRVECFLAGS := $(CKVECFLAGS) + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/skx/bli_kernel_defs_skx.h b/share/blis/config/skx/bli_kernel_defs_skx.h new file mode 100644 index 000000000..2aaf477ad --- /dev/null +++ b/share/blis/config/skx/bli_kernel_defs_skx.h @@ -0,0 +1,48 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 32 +#define BLIS_MR_d 16 + +#define BLIS_NR_s 12 +#define BLIS_NR_d 14 + +//#endif + diff --git a/share/blis/config/skx/make_defs.mk b/share/blis/config/skx/make_defs.mk new file mode 100644 index 000000000..589e73dda --- /dev/null +++ b/share/blis/config/skx/make_defs.mk @@ -0,0 +1,126 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := skx +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +# NOTE: The -fomit-frame-pointer option is needed for some kernels because +# they make explicit use of the rbp register. +CKOPTFLAGS := $(COPTFLAGS) -O3 -fomit-frame-pointer +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mavx512f -mavx512dq -mavx512bw -mavx512vl -mfpmath=sse -march=skylake-avx512 +else +ifeq ($(CC_VENDOR),icc) +CKVECFLAGS := -xCORE-AVX512 +else +ifeq ($(CC_VENDOR),clang) +# NOTE: We have to use -march=haswell on Windows because apparently AVX512 +# uses an alternate calling convention where xmm registers are not callee-saved +# on the stack. When this is mixed with framework code compiled for general +# x86_64 mode then chaos ensues (e.g. #514). +ifeq ($(IS_WIN),yes) +CKVECFLAGS := -mavx512f -mavx512dq -mavx512bw -mavx512vl -mfpmath=sse -march=haswell +else +CKVECFLAGS := -mavx512f -mavx512dq -mavx512bw -mavx512vl -mfpmath=sse -march=skylake-avx512 +endif +else +$(error gcc, icc, or clang is required for this configuration.) +endif +endif +endif + +# The assembler on OS X won't recognize AVX512 without help +ifneq ($(CC_VENDOR),icc) +ifeq ($(OS_NAME),Darwin) +CKVECFLAGS += -Wa,-march=skylake-avx512 +endif +endif + +# Flags specific to reference kernels. +# Note: We use AVX2 for reference kernels because, as Jeff Hammond says, +# reference kernel code "is not going to achieve high enough SIMD utilization +# to overcome the AVX-512 frequency drop". (Issue #187) +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := -march=skylake-avx512 -mno-avx512f -mno-avx512vl -mno-avx512bw -mno-avx512dq -mno-avx512cd -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),icc) +CRVECFLAGS := -xCORE-AVX2 +else +ifeq ($(CC_VENDOR),clang) +# NOTE: We have to use -march=haswell on Windows because apparently AVX512 +# uses an alternate calling convention where xmm registers are not callee-saved +# on the stack. When this is mixed with framework code compiled for general +# x86_64 mode then chaos ensues (e.g. #514). +ifeq ($(IS_WIN),yes) +CRVECFLAGS := -march=haswell -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := -march=skylake-avx512 -mno-avx512f -mno-avx512vl -mno-avx512bw -mno-avx512dq -mno-avx512cd -funsafe-math-optimizations -ffp-contract=fast +endif +else +$(error gcc, icc, or clang is required for this configuration.) +endif +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/steamroller/bli_kernel_defs_steamroller.h b/share/blis/config/steamroller/bli_kernel_defs_steamroller.h new file mode 100644 index 000000000..df4a8c411 --- /dev/null +++ b/share/blis/config/steamroller/bli_kernel_defs_steamroller.h @@ -0,0 +1,52 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 16 +#define BLIS_MR_d 8 +#define BLIS_MR_c 4 +#define BLIS_MR_z 2 + +#define BLIS_NR_s 3 +#define BLIS_NR_d 3 +#define BLIS_NR_c 2 +#define BLIS_NR_z 2 + +//#endif + diff --git a/share/blis/config/steamroller/make_defs.mk b/share/blis/config/steamroller/make_defs.mk new file mode 100644 index 000000000..122472c85 --- /dev/null +++ b/share/blis/config/steamroller/make_defs.mk @@ -0,0 +1,90 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := steamroller +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mfpmath=sse -mavx -mfma -march=bdver3 -mno-fma4 -mno-tbm -mno-xop -mno-lwp +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := -mfpmath=sse -mavx -mfma -march=bdver3 -mno-fma4 -mno-tbm -mno-xop -mno-lwp +else +$(error gcc or clang are required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/thunderx2/bli_kernel_defs_thunderx2.h b/share/blis/config/thunderx2/bli_kernel_defs_thunderx2.h new file mode 100644 index 000000000..60292099c --- /dev/null +++ b/share/blis/config/thunderx2/bli_kernel_defs_thunderx2.h @@ -0,0 +1,48 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 8 +#define BLIS_MR_d 6 + +#define BLIS_NR_s 12 +#define BLIS_NR_d 8 + +//#endif + diff --git a/share/blis/config/thunderx2/make_defs.mk b/share/blis/config/thunderx2/make_defs.mk new file mode 100644 index 000000000..fd7df2eee --- /dev/null +++ b/share/blis/config/thunderx2/make_defs.mk @@ -0,0 +1,90 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := thunderx2 +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := -D_GNU_SOURCE +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 -mcpu=thunderx2t99 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 -ftree-vectorize +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mcpu=thunderx2t99 +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := -mcpu=thunderx2t99 +else +$(error gcc or clang is required for this configuration.) +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/x86_64/make_defs.mk b/share/blis/config/x86_64/make_defs.mk new file mode 100644 index 000000000..3c912370e --- /dev/null +++ b/share/blis/config/x86_64/make_defs.mk @@ -0,0 +1,94 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := x86_64 +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 +endif + +# Flags specific to optimized kernels. +CKOPTFLAGS := $(COPTFLAGS) -O3 +ifeq ($(CC_VENDOR),gcc) +CKVECFLAGS := -mssse3 -mfpmath=sse -march=core2 +else +ifeq ($(CC_VENDOR),icc) +CKVECFLAGS := -xSSE3 +else +ifeq ($(CC_VENDOR),clang) +CKVECFLAGS := -mssse3 -mfpmath=sse -march=core2 +else +$(error gcc, icc, or clang is required for this configuration.) +endif +endif +endif + +# Flags specific to reference kernels. +CROPTFLAGS := $(CKOPTFLAGS) +ifeq ($(CC_VENDOR),gcc) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +else +CRVECFLAGS := $(CKVECFLAGS) +endif +endif + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/zen/bli_kernel_defs_zen.h b/share/blis/config/zen/bli_kernel_defs_zen.h new file mode 100644 index 000000000..c5bc8d63f --- /dev/null +++ b/share/blis/config/zen/bli_kernel_defs_zen.h @@ -0,0 +1,52 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 6 +#define BLIS_MR_d 6 +#define BLIS_MR_c 3 +#define BLIS_MR_z 3 + +#define BLIS_NR_s 16 +#define BLIS_NR_d 8 +#define BLIS_NR_c 8 +#define BLIS_NR_z 4 + +//#endif + diff --git a/share/blis/config/zen/make_defs.mk b/share/blis/config/zen/make_defs.mk new file mode 100644 index 000000000..389a313b6 --- /dev/null +++ b/share/blis/config/zen/make_defs.mk @@ -0,0 +1,93 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := zen +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 -fomit-frame-pointer +endif + +# Flags specific to optimized and reference kernels. +# NOTE: The -fomit-frame-pointer option is needed for some kernels because +# they make explicit use of the rbp register. +CKOPTFLAGS := $(COPTFLAGS) -O3 +CROPTFLAGS := $(CKOPTFLAGS) +CKVECFLAGS := -mavx2 -mfma -mfpmath=sse +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +ifeq ($(CC_VENDOR),gcc) + ifeq ($(GCC_OT_6_1_0),yes) # gcc versions older than 6.1. + CVECFLAGS_VER := -march=bdver4 -mno-fma4 -mno-tbm -mno-xop -mno-lwp + else + CVECFLAGS_VER := -march=znver1 -mno-avx256-split-unaligned-store + endif +else +ifeq ($(CC_VENDOR),clang) + CVECFLAGS_VER := -march=znver1 +else +ifeq ($(CC_VENDOR),aocc) + CVECFLAGS_VER := -march=znver1 -mllvm -disable-licm-vrp +else + $(error gcc, clang, or aocc is required for this configuration.) +endif +endif +endif +CKVECFLAGS += $(CVECFLAGS_VER) +CRVECFLAGS += $(CVECFLAGS_VER) + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/zen2/bli_kernel_defs_zen2.h b/share/blis/config/zen2/bli_kernel_defs_zen2.h new file mode 100644 index 000000000..c5bc8d63f --- /dev/null +++ b/share/blis/config/zen2/bli_kernel_defs_zen2.h @@ -0,0 +1,52 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 6 +#define BLIS_MR_d 6 +#define BLIS_MR_c 3 +#define BLIS_MR_z 3 + +#define BLIS_NR_s 16 +#define BLIS_NR_d 8 +#define BLIS_NR_c 8 +#define BLIS_NR_z 4 + +//#endif + diff --git a/share/blis/config/zen2/make_defs.mk b/share/blis/config/zen2/make_defs.mk new file mode 100644 index 000000000..1eebf7fa7 --- /dev/null +++ b/share/blis/config/zen2/make_defs.mk @@ -0,0 +1,105 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := zen2 +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O2 -fomit-frame-pointer +endif + +# Flags specific to optimized and reference kernels. +# NOTE: The -fomit-frame-pointer option is needed for some kernels because +# they make explicit use of the rbp register. +CKOPTFLAGS := $(COPTFLAGS) -O3 +CROPTFLAGS := $(CKOPTFLAGS) +CKVECFLAGS := -mavx2 -mfma -mfpmath=sse +CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast +ifeq ($(CC_VENDOR),gcc) + ifeq ($(GCC_OT_6_1_0),yes) # gcc versions older than 6.1. + CVECFLAGS_VER := -march=bdver4 -mno-fma4 -mno-tbm -mno-xop -mno-lwp + else + ifeq ($(GCC_OT_9_1_0),yes) # gcc versions 6.1 or newer, but older than 9.1. + CVECFLAGS_VER := -march=znver1 -mno-avx256-split-unaligned-store + else # gcc versions 9.1 or newer. + CVECFLAGS_VER := -march=znver2 + endif + endif +else +ifeq ($(CC_VENDOR),clang) + ifeq ($(CLANG_OT_9_0_0),yes) # clang versions older than 9.0. + CVECFLAGS_VER := -march=znver1 + else # clang versions 9.0 or newer. + CVECFLAGS_VER := -march=znver2 + endif +else +ifeq ($(CC_VENDOR),aocc) + ifeq ($(AOCC_OT_2_0_0),yes) # aocc versions older than 2.0. + CVECFLAGS_VER := -march=znver1 -mllvm -disable-licm-vrp + else # aocc versions 2.0 or newer. + CVECFLAGS_VER := -march=znver2 + endif +else + $(error gcc, clang, or aocc is required for this configuration.) +endif +endif +endif +CKVECFLAGS += $(CVECFLAGS_VER) +CRVECFLAGS += $(CVECFLAGS_VER) + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) + diff --git a/share/blis/config/zen3/bli_kernel_defs_zen3.h b/share/blis/config/zen3/bli_kernel_defs_zen3.h new file mode 100644 index 000000000..c5bc8d63f --- /dev/null +++ b/share/blis/config/zen3/bli_kernel_defs_zen3.h @@ -0,0 +1,52 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +//#ifndef BLIS_KERNEL_DEFS_H +//#define BLIS_KERNEL_DEFS_H + + +// -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- + +#define BLIS_MR_s 6 +#define BLIS_MR_d 6 +#define BLIS_MR_c 3 +#define BLIS_MR_z 3 + +#define BLIS_NR_s 16 +#define BLIS_NR_d 8 +#define BLIS_NR_c 8 +#define BLIS_NR_z 4 + +//#endif + diff --git a/share/blis/config/zen3/make_defs.mk b/share/blis/config/zen3/make_defs.mk new file mode 100644 index 000000000..0bd4ed344 --- /dev/null +++ b/share/blis/config/zen3/make_defs.mk @@ -0,0 +1,126 @@ +# +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + + +# Declare the name of the current configuration and add it to the +# running list of configurations included by common.mk. +THIS_CONFIG := zen3 +#CONFIGS_INCL += $(THIS_CONFIG) + +# +# --- Determine the C compiler and related flags --- +# + +# NOTE: The build system will append these variables with various +# general-purpose/configuration-agnostic flags in common.mk. You +# may specify additional flags here as needed. +CPPROCFLAGS := +CMISCFLAGS := +CPICFLAGS := -fPIC +CWARNFLAGS := + +ifneq ($(DEBUG_TYPE),off) +CDBGFLAGS := -g +endif + +ifeq ($(DEBUG_TYPE),noopt) +COPTFLAGS := -O0 +else +COPTFLAGS := -O3 +endif + +# Flags specific to optimized and reference kernels. +# NOTE: The -fomit-frame-pointer option is needed for some kernels because +# they make explicit use of the rbp register. +CKOPTFLAGS := $(COPTFLAGS) -fomit-frame-pointer +CROPTFLAGS := $(CKOPTFLAGS) +CKVECFLAGS := -mavx2 -mfma +CRVECFLAGS := $(CKVECFLAGS) +ifeq ($(CC_VENDOR),gcc) + ifeq ($(GCC_OT_9_1_0),yes) # gcc versions older than 9.1. + CVECFLAGS_VER := -march=znver1 -mno-avx256-split-unaligned-store + else + ifeq ($(GCC_OT_10_3_0),yes) # gcc versions 9.1 or newer, but older than 10.3. + CVECFLAGS_VER := -march=znver2 + else # gcc versions 10.1 or newer. + CVECFLAGS_VER := -march=znver3 + endif + endif + CKVECFLAGS += -mfpmath=sse + CRVECFLAGS += -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),clang) + ifeq ($(CLANG_OT_9_0_0),yes) # clang versions older than 9.0. + CVECFLAGS_VER := -march=znver1 + else + ifeq ($(CLANG_OT_12_0_0),yes) # clang versions 9.0 or newer, but older than 12.0. + CVECFLAGS_VER := -march=znver2 + else + ifeq ($(OS_NAME),Darwin) # clang version 12.0 on OSX lacks znver3 support + CVECFLAGS_VER := -march=znver2 + else # clang versions 12.0 or newer. + CVECFLAGS_VER := -march=znver3 + endif + endif + endif + CKVECFLAGS += -mfpmath=sse + CRVECFLAGS += -funsafe-math-optimizations -ffp-contract=fast +else +ifeq ($(CC_VENDOR),aocc) + ifeq ($(AOCC_OT_2_0_0),yes) # aocc versions older than 2.0. + CVECFLAGS_VER := -march=znver1 + else + ifeq ($(AOCC_OT_3_0_0),yes) # aocc versions 2.0 or newer, but older than 3.0. + CVECFLAGS_VER := -march=znver2 + else # aocc versions 3.0 or newer. + CVECFLAGS_VER := -march=znver3 + endif + endif + CKVECFLAGS += -mfpmath=sse + CRVECFLAGS += -funsafe-math-optimizations -ffp-contract=fast +ifeq ($(CC_VENDOR),nvc) + CVECFLAGS_VER := -march=znver3 + CRVECFLAGS += -fast +else + $(error gcc, clang, nvc or aocc is required for this configuration.) +endif +endif +endif +endif +CKVECFLAGS += $(CVECFLAGS_VER) +CRVECFLAGS += $(CVECFLAGS_VER) + +# Store all of the variables here to new variables containing the +# configuration name. +$(eval $(call store-make-defs,$(THIS_CONFIG))) diff --git a/share/blis/config_registry b/share/blis/config_registry new file mode 100644 index 000000000..f30c7d835 --- /dev/null +++ b/share/blis/config_registry @@ -0,0 +1,70 @@ +# +# config_registry +# +# Please refer to the BLIS wiki on configurations for information on the +# syntax and semantics of this file [1]. +# +# [1] https://github.com/flame/blis/blob/master/docs/ConfigurationHowTo.md +# + +# Processor families. +x86_64: intel64 amd64 amd64_legacy +intel64: skx knl haswell sandybridge penryn generic +amd64_legacy: excavator steamroller piledriver bulldozer generic +amd64: zen3 zen2 zen generic +arm64: armsve firestorm thunderx2 cortexa57 cortexa53 generic +arm32: cortexa15 cortexa9 generic +power: power10 power9 generic + +# Intel architectures. +skx: skx/skx/haswell/zen +knl: knl/knl/haswell/zen +haswell: haswell/haswell/zen +sandybridge: sandybridge +penryn: penryn + +# AMD architectures. +zen3: zen3/zen3/zen2/zen/haswell +zen2: zen2/zen2/zen/haswell +zen: zen/zen/haswell +excavator: excavator/piledriver +steamroller: steamroller/piledriver +piledriver: piledriver +bulldozer: bulldozer + +# ARM architectures. +armsve: armsve/armsve +a64fx: a64fx/armsve + +# ARM Neon64 (4 pipes x 128b) architectures. +altramax: altramax/armv8a +altra: altra/armv8a +firestorm: firestorm/armv8a + +# ARM (2 pipes x 128b) architectures. +thunderx2: thunderx2/armv8a +cortexa57: cortexa57/armv8a +cortexa53: cortexa53/armv8a + +# ARM Vintage architectures. +cortexa15: cortexa15/armv7a +cortexa9: cortexa9/armv7a + +# IBM architectures. +power10: power10 +power9: power9 +bgq: bgq + +# RISC-V architectures. Added! +rv32i: rv32i/rvi +rv64i: rv64i/rvi +rv32iv: rv32iv/rviv +rv64iv: rv64iv/rviv +rvv_sg2042: rvv_sg2042 + +# SiFive architectures. +sifive_rvv: sifive_rvv +sifive_x280: sifive_x280/sifive_rvv + +# Generic architectures. +generic: generic diff --git a/share/blis/configure-plugin b/share/blis/configure-plugin new file mode 100755 index 000000000..c217c4da8 --- /dev/null +++ b/share/blis/configure-plugin @@ -0,0 +1,5551 @@ +#!/usr/bin/env bash +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2014, The University of Texas at Austin +# Copyright (C) 2020-2022, Advanced Micro Devices, Inc. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name(s) of the copyright holder(s) nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# +# shellcheck disable=2001,2249,2034,2154,2181,2312,2250,2292 + +# +# -- Helper functions ---------------------------------------------------------- +# + +print_usage() +{ + # Use the version string in the 'version' file since we don't have + # the patched version string yet. + if [ -z "${version}" ]; then + version=$(<"${version_filepath}") + fi + + # Echo usage info. + cat <