Skip to content

Commit 90fa9e7

Browse files
committed
Fix xdl bug
1 parent 5ce21a3 commit 90fa9e7

File tree

1 file changed

+2
-0
lines changed

1 file changed

+2
-0
lines changed

include/ck/tensor_operation/gpu/grid/gridwise_gemm_bias_add_reduce_xdl_cshuffle_v1.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -897,6 +897,8 @@ struct GridwiseGemmBiasAddReduce_k0mk1_k0nk1_mn_xdl_cshuffle_v1
897897
static_assert(num_access == sfc_c_global.GetNumOfAccess(), "wrong!");
898898

899899
static_for<0, num_access, 1>{}([&](auto access_id) {
900+
block_sync_lds();
901+
900902
// each thread write its data from VGPR to LDS
901903
c_thread_copy_vgpr_to_lds.Run(c_thread_desc_m0_n0_m1_n1_m2_m3_m4_n2,
902904
sfc_c_vgpr.GetIndexTupleOfNumber(access_id),

0 commit comments

Comments
 (0)