@@ -144,6 +144,53 @@ void bli_dgemv_unf_var1
144144
145145 conja = bli_extract_conj ( transa );
146146
147+ // When dynamic dispatch is enabled i.e. library is built for ‘amdzen’ configuration.
148+ // This function is invoked on all architectures including ‘generic’.
149+ // Invoke architecture specific kernels only if we are sure that we are running on zen,
150+ // zen2 or zen3 otherwise fall back to reference kernels (via framework and context).
151+ arch_t id = bli_arch_query_id ();
152+ bool bamdzen = (id == BLIS_ARCH_ZEN3 ) || (id == BLIS_ARCH_ZEN2 ) || (id == BLIS_ARCH_ZEN );
153+
154+ if (bamdzen == 0 )
155+ {
156+ if ( cntx == NULL ) cntx = bli_gks_query_cntx ();
157+ const num_t dt = PASTEMAC (d ,type );
158+ double * x1 ;
159+ double * y1 ;
160+ PASTECH (d ,dotxf_ker_ft ) kfp_df ;
161+ /* Query the context for the kernel function pointer and fusing factor. */
162+ kfp_df = bli_cntx_get_l1f_ker_dt ( dt , BLIS_DOTXF_KER , cntx );
163+ dim_t b_fuse = bli_cntx_get_blksz_def_dt ( dt , BLIS_DF , cntx );
164+
165+ for ( i = 0 ; i < n_iter ; i += f )
166+ {
167+ f = bli_determine_blocksize_dim_f ( i , n_iter , b_fuse );
168+
169+ A1 = a + (i )* rs_at + (0 )* cs_at ;
170+ x1 = x + (0 )* incy ;
171+ y1 = y + (i )* incy ;
172+
173+ /* y1 = beta * y1 + alpha * A1 * x; */
174+ kfp_df
175+ (
176+ conja ,
177+ conjx ,
178+ n_elem ,
179+ f ,
180+ alpha ,
181+ A1 , cs_at , rs_at ,
182+ x1 , incx ,
183+ beta ,
184+ y1 , incy ,
185+ cntx
186+ );
187+
188+ }
189+
190+ AOCL_DTL_TRACE_EXIT (AOCL_DTL_LEVEL_TRACE_3 );
191+ return ;
192+ }
193+
147194 if (incx > 1 )
148195 {
149196 /*
@@ -261,6 +308,51 @@ void bli_sgemv_unf_var1
261308
262309 conja = bli_extract_conj ( transa );
263310
311+ // When dynamic dispatch is enabled i.e. library is built for ‘amdzen’ configuration.
312+ // This function is invoked on all architectures including ‘generic’.
313+ // Invoke architecture specific kernels only if we are sure that we are running on zen,
314+ // zen2 or zen3 otherwise fall back to reference kernels (via framework and context).
315+ arch_t id = bli_arch_query_id ();
316+ bool bamdzen = (id == BLIS_ARCH_ZEN3 ) || (id == BLIS_ARCH_ZEN2 ) || (id == BLIS_ARCH_ZEN );
317+
318+ if (bamdzen == 0 )
319+ {
320+ if ( cntx == NULL ) cntx = bli_gks_query_cntx ();
321+ const num_t dt = PASTEMAC (s ,type );
322+ float * x1 ;
323+ PASTECH (s ,dotxf_ker_ft ) kfp_df ;
324+ /* Query the context for the kernel function pointer and fusing factor. */
325+ kfp_df = bli_cntx_get_l1f_ker_dt ( dt , BLIS_DOTXF_KER , cntx );
326+ b_fuse = bli_cntx_get_blksz_def_dt ( dt , BLIS_DF , cntx );
327+
328+ for ( i = 0 ; i < n_iter ; i += f )
329+ {
330+ f = bli_determine_blocksize_dim_f ( i , n_iter , b_fuse );
331+
332+ A1 = a + (i )* rs_at + (0 )* cs_at ;
333+ x1 = x + (0 )* incy ;
334+ y1 = y + (i )* incy ;
335+
336+ /* y1 = beta * y1 + alpha * A1 * x; */
337+ kfp_df
338+ (
339+ conja ,
340+ conjx ,
341+ n_elem ,
342+ f ,
343+ alpha ,
344+ A1 , cs_at , rs_at ,
345+ x1 , incx ,
346+ beta ,
347+ y1 , incy ,
348+ cntx
349+ );
350+
351+ }
352+
353+ AOCL_DTL_TRACE_EXIT (AOCL_DTL_LEVEL_TRACE_3 );
354+ return ;
355+ }
264356
265357 /* Query the context for the kernel function pointer and fusing factor. */
266358 b_fuse = 8 ;
0 commit comments