@@ -781,11 +781,11 @@ static void emitGroupedCopies(ArrayRef<CopyInfo> copies,
781781 continue ;
782782 }
783783 // Phase 2: Emit transfers based on layout contiguity.
784- // Check if all copies in group have TileContiguous layout (benefit from
785- // loop fusion). For FullyContiguous/RowContiguous, emit separate
784+ // Check if all copies in subgroup have TileContiguous layout (benefit
785+ // from loop fusion). For FullyContiguous/RowContiguous, emit separate
786786 // optimized transfers since they don't benefit from fusion.
787787 bool allTileContiguous = true ;
788- for (const CopyInfo &info : group ) {
788+ for (const CopyInfo &info : subgroup ) {
789789 auto layoutInfo = utils::analyzeLayoutContiguity (info.tensorTy );
790790 if (layoutInfo.level != utils::ContiguityLevel::TileContiguous &&
791791 layoutInfo.level != utils::ContiguityLevel::NonContiguous) {
@@ -798,7 +798,7 @@ static void emitGroupedCopies(ArrayRef<CopyInfo> copies,
798798 // All copies are tile-contiguous: emit single fused tile loop.
799799 emitTileLoop (builder, loc, tilesY, tilesX,
800800 [&](OpBuilder &b, Location bodyLoc, Value tileOffset) {
801- for (size_t i = 0 ; i < group .size (); ++i) {
801+ for (size_t i = 0 ; i < subgroup .size (); ++i) {
802802 if (isRead) {
803803 b.create <ttk::NocAsyncReadTileOp>(
804804 bodyLoc, tileOffset, accessors[i], cbPtrs[i]);
@@ -811,9 +811,9 @@ static void emitGroupedCopies(ArrayRef<CopyInfo> copies,
811811 } else {
812812 // Some copies are fully/row contiguous: emit separate optimized
813813 // transfers (block transfers don't benefit from loop fusion).
814- for (size_t i = 0 ; i < group .size (); ++i) {
814+ for (size_t i = 0 ; i < subgroup .size (); ++i) {
815815 emitOptimizedTransfer (builder, loc, accessors[i], cbPtrs[i], isRead,
816- group [i].tensorTy , tilesY, tilesX);
816+ subgroup [i].tensorTy , tilesY, tilesX);
817817 }
818818 }
819819
0 commit comments