[GPU] Update MemDescSubsliceOp verification to handle CTA dimensions; modify test cases for clarity

Jokeren · Jokeren · commit dc2dfbcd551e · 2026-02-18T20:28:07.000-05:00
diff --git a/lib/Dialect/TritonGPU/IR/Ops.cpp b/lib/Dialect/TritonGPU/IR/Ops.cpp
@@ -1017,19 +1017,25 @@ LogicalResult MemDescSubsliceOp::verify() {
     for (auto d : standardOutDimNames(ctx, srcTy.getRank())) {
       namedOffsets.push_back({d, 0});
     }
-    for (int dimSize = dstTy.getDimSize(dim); dimSize < srcTy.getDimSize(dim);
-         dimSize *= 2) {
-      namedOffsets[dim] = {kDim, dimSize};
+    // Splitting at `dimSize` is valid as long as all points in [0, dimSize)
+    // stay within the same CTA.
+    for (int splitOffset = 0; splitOffset < dstTy.getDimSize(dim);
+         ++splitOffset) {
+      namedOffsets[dim] = {kDim, splitOffset};
       for (auto [inDim, val] : llInv.apply(namedOffsets)) {
-        if (inDim == kOffset && !llvm::isPowerOf2_32(val)) {
-          return emitError(
-              "We don't support splitting along the swizzling pattern");
-        }
         if (inDim == kBlock && val != 0) {
           return emitError("We don't support splitting along CTA dimensions");
         }
       }
     }
+    for (int dimSize = dstTy.getDimSize(dim); dimSize < srcTy.getDimSize(dim);
+         dimSize *= 2) {
+      namedOffsets[dim] = {kDim, dimSize};
+      if (!llvm::isPowerOf2_32(llInv.apply(namedOffsets)[0].second)) {
+        return emitError(
+            "We don't support splitting along the swizzling pattern");
+      }
+    }
   }
   return success();
 }
diff --git a/test/TritonGPU/invalid.mlir b/test/TritonGPU/invalid.mlir
@@ -4,7 +4,7 @@
 #smem = #ttg.shared_memory
 module attributes {"ttg.num-ctas" = 2 : i32} {
   tt.func public @subslice_non_broadcast_cga_dim(%arg0: !ttg.memdesc<8x16xf32, #shared, #smem>) {
-      // expected-error @+1 {{non-broadcast CGA dimensions}}
+      // expected-error @+1 {{CTA dimensions}}
       %a = ttg.memdesc_subslice %arg0 [0, 0] : !ttg.memdesc<8x16xf32, #shared, #smem> -> !ttg.memdesc<8x8xf32, #shared, #smem>
       tt.return
   }
@@ -98,7 +98,7 @@ tt.func public @result_1d_to_1d(%arg0: !ttg.memdesc<8xf32, #shared, #smem>) {
 
 // -----
 
-#shared = #ttg.shared_linear<{offset = [[0, 1], [0, 2], [1, 0], [2, 0]], block = [[4, 0]]}, alignment = 16>
+#shared = #ttg.shared_linear<{offset = [[0, 1], [0, 2], [1, 0], [4, 0]], block = [[2, 0]]}, alignment = 16>
 #smem = #ttg.shared_memory
 module attributes {"ttg.num-ctas" = 2 : i32} {
   tt.func public @subview_split_on_cta_dim(%arg0: !ttg.memdesc<8x4xf32, #shared, #smem>) {

Original file line number	Diff line number	Diff line change
`@@ -1017,19 +1017,25 @@ LogicalResult MemDescSubsliceOp::verify() {`
`1017`	`1017`	`for (auto d : standardOutDimNames(ctx, srcTy.getRank())) {`
`1018`	`1018`	`namedOffsets.push_back({d, 0});`
`1019`	`1019`	`}`
`1020`		`- for (int dimSize = dstTy.getDimSize(dim); dimSize < srcTy.getDimSize(dim);`
`1021`		`- dimSize *= 2) {`
`1022`		`- namedOffsets[dim] = {kDim, dimSize};`
	`1020`	+ // Splitting at `dimSize` is valid as long as all points in [0, dimSize)
	`1021`	`+ // stay within the same CTA.`
	`1022`	`+ for (int splitOffset = 0; splitOffset < dstTy.getDimSize(dim);`
	`1023`	`+ ++splitOffset) {`
	`1024`	`+ namedOffsets[dim] = {kDim, splitOffset};`
`1023`	`1025`	`for (auto [inDim, val] : llInv.apply(namedOffsets)) {`
`1024`		`- if (inDim == kOffset && !llvm::isPowerOf2_32(val)) {`
`1025`		`- return emitError(`
`1026`		`- "We don't support splitting along the swizzling pattern");`
`1027`		`- }`
`1028`	`1026`	`if (inDim == kBlock && val != 0) {`
`1029`	`1027`	`return emitError("We don't support splitting along CTA dimensions");`
`1030`	`1028`	`}`
`1031`	`1029`	`}`
`1032`	`1030`	`}`
	`1031`	`+ for (int dimSize = dstTy.getDimSize(dim); dimSize < srcTy.getDimSize(dim);`
	`1032`	`+ dimSize *= 2) {`
	`1033`	`+ namedOffsets[dim] = {kDim, dimSize};`
	`1034`	`+ if (!llvm::isPowerOf2_32(llInv.apply(namedOffsets)[0].second)) {`
	`1035`	`+ return emitError(`
	`1036`	`+ "We don't support splitting along the swizzling pattern");`
	`1037`	`+ }`
	`1038`	`+ }`
`1033`	`1039`	`}`
`1034`	`1040`	`return success();`
`1035`	`1041`	`}`