Skip to content

Commit c12dbcf

Browse files
committed
Fixed dynamic dispatch crash issue on non-zen architecture for gemv and axpy routines.
Summary: 1. This commit fixed issue for gemv and axpy API’s. 2. The BLIS binary with dynamic dispatch feature was crashing on non-zen CPUs (specifically CPUs without AVX2 support). 3. The crash was caused by un-supported instructions in zen optimized kernels.The issue is fixed by calling only reference kernels if the architecture detected at runtime is not zen, zen2 or zen3. Change-Id: Icc6f7fdc80bc58fac1a97b1502b6f269e5e89aa4
1 parent 944286c commit c12dbcf

6 files changed

Lines changed: 1043 additions & 385 deletions

File tree

frame/2/gemv/bli_gemv_unf_var1.c

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,53 @@ void bli_dgemv_unf_var1
144144

145145
conja = bli_extract_conj( transa );
146146

147+
// When dynamic dispatch is enabled i.e. library is built for ‘amdzen’ configuration.
148+
// This function is invoked on all architectures including ‘generic’.
149+
// Invoke architecture specific kernels only if we are sure that we are running on zen,
150+
// zen2 or zen3 otherwise fall back to reference kernels (via framework and context).
151+
arch_t id = bli_arch_query_id();
152+
bool bamdzen = (id == BLIS_ARCH_ZEN3) || (id == BLIS_ARCH_ZEN2) || (id == BLIS_ARCH_ZEN);
153+
154+
if (bamdzen == 0)
155+
{
156+
if ( cntx == NULL ) cntx = bli_gks_query_cntx();
157+
const num_t dt = PASTEMAC(d,type);
158+
double* x1;
159+
double* y1;
160+
PASTECH(d,dotxf_ker_ft) kfp_df;
161+
/* Query the context for the kernel function pointer and fusing factor. */
162+
kfp_df = bli_cntx_get_l1f_ker_dt( dt, BLIS_DOTXF_KER, cntx );
163+
dim_t b_fuse = bli_cntx_get_blksz_def_dt( dt, BLIS_DF, cntx );
164+
165+
for ( i = 0; i < n_iter; i += f )
166+
{
167+
f = bli_determine_blocksize_dim_f( i, n_iter, b_fuse );
168+
169+
A1 = a + (i )*rs_at + (0 )*cs_at;
170+
x1 = x + (0 )*incy;
171+
y1 = y + (i )*incy;
172+
173+
/* y1 = beta * y1 + alpha * A1 * x; */
174+
kfp_df
175+
(
176+
conja,
177+
conjx,
178+
n_elem,
179+
f,
180+
alpha,
181+
A1, cs_at, rs_at,
182+
x1, incx,
183+
beta,
184+
y1, incy,
185+
cntx
186+
);
187+
188+
}
189+
190+
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_3);
191+
return;
192+
}
193+
147194
if (incx > 1)
148195
{
149196
/*
@@ -261,6 +308,51 @@ void bli_sgemv_unf_var1
261308

262309
conja = bli_extract_conj( transa );
263310

311+
// When dynamic dispatch is enabled i.e. library is built for ‘amdzen’ configuration.
312+
// This function is invoked on all architectures including ‘generic’.
313+
// Invoke architecture specific kernels only if we are sure that we are running on zen,
314+
// zen2 or zen3 otherwise fall back to reference kernels (via framework and context).
315+
arch_t id = bli_arch_query_id();
316+
bool bamdzen = (id == BLIS_ARCH_ZEN3) || (id == BLIS_ARCH_ZEN2) || (id == BLIS_ARCH_ZEN);
317+
318+
if (bamdzen == 0)
319+
{
320+
if ( cntx == NULL ) cntx = bli_gks_query_cntx();
321+
const num_t dt = PASTEMAC(s,type);
322+
float* x1 ;
323+
PASTECH(s,dotxf_ker_ft) kfp_df;
324+
/* Query the context for the kernel function pointer and fusing factor. */
325+
kfp_df = bli_cntx_get_l1f_ker_dt( dt, BLIS_DOTXF_KER, cntx );
326+
b_fuse = bli_cntx_get_blksz_def_dt( dt, BLIS_DF, cntx );
327+
328+
for ( i = 0; i < n_iter; i += f )
329+
{
330+
f = bli_determine_blocksize_dim_f( i, n_iter, b_fuse );
331+
332+
A1 = a + (i )*rs_at + (0 )*cs_at;
333+
x1 = x + (0 )*incy;
334+
y1 = y + (i )*incy;
335+
336+
/* y1 = beta * y1 + alpha * A1 * x; */
337+
kfp_df
338+
(
339+
conja,
340+
conjx,
341+
n_elem,
342+
f,
343+
alpha,
344+
A1, cs_at, rs_at,
345+
x1, incx,
346+
beta,
347+
y1, incy,
348+
cntx
349+
);
350+
351+
}
352+
353+
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_3);
354+
return;
355+
}
264356

265357
/* Query the context for the kernel function pointer and fusing factor. */
266358
b_fuse = 8;

0 commit comments

Comments
 (0)