Skip to content

Commit 9212e3b

Browse files
committed
AOCL-5.3 GA Release
Tagging AOCL-5.3-RC3
2 parents ad6a372 + 5c1555b commit 9212e3b

148 files changed

Lines changed: 23035 additions & 8518 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

CMakeLists.txt

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
An object-based framework for developing high-performance BLAS-like
55
libraries.
66
7-
Copyright (C) 2022 - 2025, Advanced Micro Devices, Inc. All rights reserved.
7+
Copyright (C) 2022 - 2026, Advanced Micro Devices, Inc. All rights reserved.
88
99
Redistribution and use in source and binary forms, with or without
1010
modification, are permitted provided that the following conditions are
@@ -72,7 +72,7 @@ if(WIN32)
7272
else()
7373
set(BLIS_CONFIG_FAMILY "" CACHE STRING "Set the configuration family for which the BLIS library will be built.")
7474
endif()
75-
set_property(CACHE BLIS_CONFIG_FAMILY PROPERTY STRINGS "auto" "generic" "zen" "zen2" "zen3" "zen4" "zen5" "amdzen")
75+
set_property(CACHE BLIS_CONFIG_FAMILY PROPERTY STRINGS "auto" "generic" "zen" "zen2" "zen3" "zen4" "zen5" "zen6" "amdzen")
7676
# Throw an error if CMake was configured with a configuration which is not enabled yet.
7777
if(NOT ((BLIS_CONFIG_FAMILY STREQUAL auto) OR
7878
(BLIS_CONFIG_FAMILY STREQUAL generic) OR
@@ -81,10 +81,11 @@ if(NOT ((BLIS_CONFIG_FAMILY STREQUAL auto) OR
8181
(BLIS_CONFIG_FAMILY STREQUAL zen3) OR
8282
(BLIS_CONFIG_FAMILY STREQUAL zen4) OR
8383
(BLIS_CONFIG_FAMILY STREQUAL zen5) OR
84+
(BLIS_CONFIG_FAMILY STREQUAL zen6) OR
8485
(BLIS_CONFIG_FAMILY STREQUAL amdzen)))
8586
message(FATAL_ERROR "Configuration for ${BLIS_CONFIG_FAMILY} is not supported. \
8687
Please re-run cmake and specify one of the following configurations for BLIS_CONFIG_FAMILY: \
87-
auto, zen, zen2, zen3, zen4, zen5, amdzen, generic.")
88+
auto, zen, zen2, zen3, zen4, zen5, zen6, amdzen, generic.")
8889
endif()
8990

9091
# automatic hardware detection
@@ -108,7 +109,7 @@ if(BLIS_CONFIG_FAMILY STREQUAL "auto")
108109
COMPILE_DEFINITIONS -I${frame_include} -I${base_include} -I${thread_include}
109110
-DBLIS_CONFIGURETIME_CPUID -DBLIS_CONFIG_SKX -DBLIS_CONFIG_KNL
110111
-DBLIS_CONFIG_HASWELL -DBLIS_CONFIG_SANDYBRIDGE -DBLIS_CONFIG_PENRYN
111-
-DBLIS_CONFIG_ZEN5 -DBLIS_CONFIG_ZEN4 -DBLIS_CONFIG_ZEN3 -DBLIS_CONFIG_ZEN2 -DBLIS_CONFIG_ZEN
112+
-DBLIS_CONFIG_ZEN6 -DBLIS_CONFIG_ZEN5 -DBLIS_CONFIG_ZEN4 -DBLIS_CONFIG_ZEN3 -DBLIS_CONFIG_ZEN2 -DBLIS_CONFIG_ZEN
112113
-DBLIS_CONFIG_EXCAVATOR -DBLIS_CONFIG_STEAMROLLER -DBLIS_CONFIG_PILEDRIVER
113114
-DBLIS_CONFIG_BULLDOZER -DBLIS_CONFIG_THUNDERX2 -DBLIS_CONFIG_CORTEXA57
114115
-DBLIS_CONFIG_CORTEXA15 -DBLIS_CONFIG_CORTEXA9
@@ -121,7 +122,8 @@ if(BLIS_CONFIG_FAMILY STREQUAL "auto")
121122
${HARDWARE_ARCH} STREQUAL zen2 OR
122123
${HARDWARE_ARCH} STREQUAL zen3 OR
123124
${HARDWARE_ARCH} STREQUAL zen4 OR
124-
${HARDWARE_ARCH} STREQUAL zen5) )
125+
${HARDWARE_ARCH} STREQUAL zen5 OR
126+
${HARDWARE_ARCH} STREQUAL zen6) )
125127
set(BLIS_CONFIG_FAMILY "generic")
126128
message(WARNING "Only AMD zen architectures are supported. \
127129
Detected ${HARDWARE_ARCH} hardware. Defaulting to generic configuration.")
@@ -412,9 +414,8 @@ endif()
412414
#------------------------------------
413415
# Set the VERSION variable to the default value in the 'version' file.
414416
file(STRINGS ${PROJECT_SOURCE_DIR}/version VERSION)
415-
# Get timestamp.
416-
string(TIMESTAMP BUILD_DATE "%Y%m%d")
417-
# Update using the timestamp.
417+
# Use what's in the version file as-is.
418+
string(TIMESTAMP BUILD_DATE "%Y%m%d" UTC)
418419
set(VERSION_STRING "AOCL-BLAS ${VERSION} Build ${BUILD_DATE}")
419420
# Initial message.
420421
message(STATUS "Starting configuration of BLIS ${VERSION_STRING}.")

CREDITS

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,9 @@ but many others have contributed code and feedback, including
2222
Robin Christ @robinchrist
2323
Dilyn Corner @dilyn-corner
2424
Mat Cross @matcross (NAG)
25+
Tony Davis @tony-davis
2526
@decandia50
26-
Daniël de Kok @danieldk (Explosion)
27+
Daniël de Kok @danieldk (Explosion)
2728
Kay Dewhurst @jkd2016 (Max Planck Institute, Halle, Germany)
2829
Jeff Diamond (Oracle)
2930
Johannes Dieterich @iotamudelta
@@ -68,7 +69,10 @@ but many others have contributed code and feedback, including
6869
Ilknur Mustafazade @Runkli
6970
@nagsingh
7071
Bhaskar Nallani @BhaskarNallani (AMD)
72+
Perry Naseck @DaAwesomeP
7173
Stepan Nassyr @stepannassyr (Jülich Supercomputing Centre)
74+
75+
Bart Oldeman @bartoldeman
7276
Nisanth Padinharepatt (AMD)
7377
Ajay Panyala @ajaypanyala
7478
Devangi Parikh @dnparikh (The University of Texas at Austin)

addon/aocl_gemm/aocl_gemm_bf16bf16f32obf16.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
An object-based framework for developing high-performance BLAS-like
55
libraries.
66
7-
Copyright (C) 2022 - 2025, Advanced Micro Devices, Inc. All rights reserved.
7+
Copyright (C) 2022 - 2026, Advanced Micro Devices, Inc. All rights reserved.
88
99
Redistribution and use in source and binary forms, with or without
1010
modification, are permitted provided that the following conditions are
@@ -235,8 +235,10 @@ AOCL_GEMM_MATMUL(bfloat16,bfloat16,bfloat16,float,bf16bf16f32obf16)
235235
* as the tiny path for BF16->FP32 is not available. Hence the arch_id also has to be
236236
* verified here.
237237
*/
238-
arch_t arch_id = bli_arch_query_id();
239-
if( ( bli_cpuid_is_avx512bf16_supported() == TRUE ) && ( ( arch_id == BLIS_ARCH_ZEN4 ) || ( arch_id == BLIS_ARCH_ZEN5 ) ) && ( is_single_thread( &rntm_g ) == TRUE) )
238+
arch_t arch_id = bli_arch_query_id_internal();
239+
if( ( bli_cpuid_is_avx512bf16_supported() == TRUE ) &&
240+
( ( arch_id == BLIS_ARCH_ZEN6 ) || ( arch_id == BLIS_ARCH_ZEN5 ) || ( arch_id == BLIS_ARCH_ZEN4 ) ) &&
241+
( is_single_thread( &rntm_g ) == TRUE ) )
240242
{
241243
if( ( is_row_major == TRUE ) &&
242244
( is_tiny_input_bf16obf16( m, n, k, lcntx_g ) == TRUE ) )

addon/aocl_gemm/aocl_gemm_bf16bf16f32of32.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
An object-based framework for developing high-performance BLAS-like
55
libraries.
66
7-
Copyright (C) 2022 - 2025, Advanced Micro Devices, Inc. All rights reserved.
7+
Copyright (C) 2022 - 2026, Advanced Micro Devices, Inc. All rights reserved.
88
99
Redistribution and use in source and binary forms, with or without
1010
modification, are permitted provided that the following conditions are
@@ -240,8 +240,9 @@ AOCL_GEMM_MATMUL(bfloat16,bfloat16,float,float,bf16bf16f32of32)
240240
* as the tiny path for BF16->FP32 is not available. Hence the arch_id also has to be
241241
* verified here.
242242
*/
243-
arch_t arch_id = bli_arch_query_id();
244-
if( ( bli_cpuid_is_avx512bf16_supported() == TRUE ) && ( ( arch_id == BLIS_ARCH_ZEN4 ) || ( arch_id == BLIS_ARCH_ZEN5 ) ) &&
243+
arch_t arch_id = bli_arch_query_id_internal();
244+
if( ( bli_cpuid_is_avx512bf16_supported() == TRUE ) &&
245+
( ( arch_id == BLIS_ARCH_ZEN6 ) || ( arch_id == BLIS_ARCH_ZEN5 ) || ( arch_id == BLIS_ARCH_ZEN4 ) ) &&
245246
( is_tiny_input_bf16of32( m, n, k, lcntx_g ) == TRUE ) &&
246247
( is_single_thread( &rntm_g ) == TRUE) &&
247248
( is_row_major == TRUE ) )

addon/aocl_gemm/aocl_gemm_f32f32f32of32_utils.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
An object-based framework for developing high-performance BLAS-like
55
libraries.
66
7-
Copyright (C) 2022 - 2024, Advanced Micro Devices, Inc. All rights reserved.
7+
Copyright (C) 2022 - 2026, Advanced Micro Devices, Inc. All rights reserved.
88
99
Redistribution and use in source and binary forms, with or without
1010
modification, are permitted provided that the following conditions are
@@ -38,6 +38,10 @@
3838
#include "lpgemm_utils.h"
3939
#include "lpgemm_reorder_f32.h"
4040

41+
#ifdef BLIS_ENABLE_OPENMP
42+
#include <omp.h>
43+
#endif
44+
4145
AOCL_GEMM_GET_REORDER_BUF_SIZE(f32f32f32of32)
4246
{
4347
if ( ( k <= 0 ) || ( n <= 0 ) )

addon/aocl_gemm/aocl_gemm_s8s8s32obf16_sym_quant.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,13 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,bfloat16,int32_t,s8s8s32obf16_sym_quant)
180180
mtag_b = PACK;
181181
}
182182

183+
if ( post_op_unparsed == NULL )
184+
{
185+
bli_print_msg(" post_op_unparsed is NULL. Exiting..",
186+
__FILE__, __LINE__ );
187+
goto err_hndl;
188+
}
189+
183190
// convert group-level post-op struct to linked list format.
184191
lpgemm_group_post_op grp_post_op_list[AOCL_MAX_POST_OPS];
185192
err_t err = lpgemm_translate_to_group_postops_list

addon/aocl_gemm/aocl_gemm_s8s8s32of32_sym_quant.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,13 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,float,int32_t,s8s8s32of32_sym_quant)
180180
mtag_b = PACK;
181181
}
182182

183+
if ( post_op_unparsed == NULL )
184+
{
185+
bli_print_msg(" post_op_unparsed is NULL. Exiting..",
186+
__FILE__, __LINE__ );
187+
goto err_hndl;
188+
}
189+
183190
// convert group-level post-op struct to linked list format.
184191
lpgemm_group_post_op grp_post_op_list[AOCL_MAX_POST_OPS];
185192
err_t err = lpgemm_translate_to_group_postops_list

addon/aocl_gemm/config/lpgemm_config.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
An object-based framework for developing high-performance BLAS-like
55
libraries.
66
7-
Copyright (C) 2022 - 2025, Advanced Micro Devices, Inc. All rights reserved.
7+
Copyright (C) 2022 - 2026, Advanced Micro Devices, Inc. All rights reserved.
88
99
Redistribution and use in source and binary forms, with or without
1010
modification, are permitted provided that the following conditions are
@@ -102,7 +102,7 @@ static bli_pthread_once_t once_check_lpgemm_func_map_init = BLIS_PTHREAD_ONCE_IN
102102

103103
static void _lpgemm_init_enable_arch()
104104
{
105-
arch_t arch_id = bli_arch_query_id();
105+
arch_t arch_id = bli_arch_query_id_internal();
106106
bool enbl_instr = bli_aocl_enable_instruction_query();
107107

108108
if ( ( enbl_instr == TRUE ) &&

addon/aocl_gemm/frame/bf16bf16f32/lpgemm_bf16.c

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -482,7 +482,7 @@ LPGEMM_5LOOP_AVX512BF16(bfloat16,bfloat16,float,bf16bf16f32of32)
482482
c_use_jc = ( float* )temp_scal_c_buffer_bf16;
483483
}else
484484
{
485-
// When k <= KC, output is written directly as there is no need of
485+
// When k <= KC, output is written directly as there is no need of
486486
// intermediate buffer to store the output
487487
c_use_jc = c + jc;
488488
}
@@ -902,7 +902,7 @@ LPGEMV_AVX2(bfloat16, bfloat16, float, bf16bf16f32of32)
902902
// Direct call to optimized GEMV conversion (K=1, contiguous output)
903903
cvt_bf16_f32_gemv_row_major
904904
(
905-
cvt_b_buffer_bf16_f32,
905+
cvt_b_buffer_bf16_f32,
906906
b, rs_b, k
907907
);
908908
}
@@ -1126,6 +1126,11 @@ LPGEMV_AVX2(bfloat16, bfloat16, float, bf16bf16f32of32)
11261126
post_op_list,
11271127
&post_ops_attr
11281128
);
1129+
1130+
if ( mtag_b == REORDERED )
1131+
{
1132+
adjust_B_panel_reordered_jc( &jc, jc_cur_loop );
1133+
}
11291134
}
11301135

11311136
if ( bli_mem_is_alloc( &mem_a ) )
@@ -1289,7 +1294,7 @@ LPGEMM_5LOOP_AVX2(bfloat16,bfloat16,float,bf16bf16f32of32)
12891294
c_use_jc = ( float* )temp_scal_c_buffer_bf16;
12901295
}else
12911296
{
1292-
// When k <= KC, output is written directly as there is no need of
1297+
// When k <= KC, output is written directly as there is no need of
12931298
// intermediate buffer to store the output
12941299
c_use_jc = c + jc;
12951300
}

addon/aocl_gemm/frame/bf16bf16f32/lpgemm_reorder_bf16.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
An object-based framework for developing high-performance BLAS-like
55
libraries.
66
7-
Copyright (C) 2022 - 2025, Advanced Micro Devices, Inc. All rights reserved.
7+
Copyright (C) 2022 - 2026, Advanced Micro Devices, Inc. All rights reserved.
88
99
Redistribution and use in source and binary forms, with or without
1010
modification, are permitted provided that the following conditions are
@@ -39,6 +39,10 @@
3939
#include "lpgemm_config.h"
4040
#include "aocl_bf16_type.h"
4141

42+
#ifdef BLIS_ENABLE_OPENMP
43+
#include <omp.h>
44+
#endif
45+
4246
void reorderb_nr64_bf16bf16f32of32_reference
4347
(
4448
lpgemm_obj_t* b,

0 commit comments

Comments
 (0)