[CONSAN] Handle memdesc selects in buffer region analysis (#10031)

pawelszczerbuk · root · web-flow · commit 12138f436a37 · 2026-04-15T10:07:53.000-07:00
Selects between different memdescs were not handled in BufferRegion
analysis at all. After moving ConSan to llvm lowering we started hitting
this case.

Co-authored-by: root &lt;root@codex-gb201-0.brix.pawelszczerbuk.svc.cluster.local&gt;
diff --git a/lib/Analysis/BufferRegion.cpp b/lib/Analysis/BufferRegion.cpp
@@ -1,5 +1,6 @@
 #include "triton/Analysis/BufferRegion.h"
 #include "mlir/Analysis/DataFlow/DeadCodeAnalysis.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
 #include "triton/Dialect/Triton/IR/Utility.h"
 #include "triton/Dialect/TritonGPU/IR/Dialect.h"
 #include "triton/Dialect/TritonGPU/IR/LinearLayoutConversions.h"
@@ -267,6 +268,16 @@ LogicalResult BufferRegionAnalysis::visitOperation(
     }
     return success();
   }
+  if (auto selectOp = dyn_cast<arith::SelectOp>(op)) {
+    if (isa<ttg::MemDescType>(selectOp.getType())) {
+      regionInfo =
+          RegionInfo::join(operands[1]->getValue(), operands[2]->getValue());
+      for (auto *r : results) {
+        propagateIfChanged(r, r->join(regionInfo));
+      }
+      return success();
+    }
+  }
   // "Passthrough" ops that don't modify the buffer regions.
   if (isa<ttg::MemDescTransOp, ttg::MemDescReshapeOp,
           ttg::MemDescReinterpretOp>(op)) {
diff --git a/test/Analysis/test-buffer-region.mlir b/test/Analysis/test-buffer-region.mlir
@@ -395,6 +395,48 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 1 : i32, ttg.shar
 
 // -----
 
+#shared = #ttg.nvmma_shared<{swizzlingByteWidth = 128, transposed = false, elementBitWidth = 32}>
+#smem = #ttg.shared_memory
+#blocked = #ttg.blocked<{sizePerThread = [1, 32], threadsPerWarp = [32, 1], warpsPerCTA = [1, 1], order = [0, 1]}>
+
+module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 1 : i32, ttg.shared = 65544 : i32, ttg.target = "cuda:90", ttg.tensor_memory_size = 0 : i32, "ttg.threads-per-warp" = 32 : i32, "ttg.total-num-warps" = 1 : i32} {
+  tt.func public @select_shared_memory_regions(%cond: i1) {
+    %alloc_a = ttg.local_alloc {allocation.offset = 57344 : i32} : () -> !ttg.memdesc<32x32xf32, #shared, #smem, mutable>
+    %alloc_b = ttg.local_alloc {allocation.offset = 61440 : i32} : () -> !ttg.memdesc<32x32xf32, #shared, #smem, mutable>
+    %selected = arith.select %cond, %alloc_a, %alloc_b : !ttg.memdesc<32x32xf32, #shared, #smem, mutable>
+    // expected-remark @below {{Buffers: [57344, 4096], [61440, 4096]}}
+    ttg.local_load %selected : !ttg.memdesc<32x32xf32, #shared, #smem, mutable> -> tensor<32x32xf32, #blocked>
+    tt.return
+  }
+
+  // expected-remark @below {{All Shared Regions: [57344, 4096], [61440, 4096]}}
+  tt.func private @print_all_regions() attributes {test.print_all_used_regions} {
+    tt.return
+  }
+}
+
+// -----
+
+#tmem = #ttng.tensor_memory_encoding<blockM = 128, blockN = 128, colStride = 1>
+
+module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 1 : i32, ttg.shared = 65544 : i32, ttg.target = "cuda:90", ttg.tensor_memory_size = 0 : i32, "ttg.threads-per-warp" = 32 : i32, "ttg.total-num-warps" = 1 : i32} {
+  tt.func public @select_tensor_memory_regions(%cond: i1) {
+    %tm0 = ttng.tmem_alloc {tensor_memory_col_offset = 0 : i32, tensor_memory_row_offset = 0 : i32} : () -> !ttg.memdesc<128x128xf32, #tmem, #ttng.tensor_memory, mutable>
+    %tm1 = ttng.tmem_alloc {tensor_memory_col_offset = 128 : i32, tensor_memory_row_offset = 0 : i32} : () -> !ttg.memdesc<128x128xf32, #tmem, #ttng.tensor_memory, mutable>
+    %selected = arith.select %cond, %tm0, %tm1 : !ttg.memdesc<128x128xf32, #tmem, #ttng.tensor_memory, mutable>
+    // expected-remark @below {{Buffers: [0, 128], [128, 128]}}
+    ttng.tmem_load %selected : !ttg.memdesc<128x128xf32, #tmem, #ttng.tensor_memory, mutable> -> tensor<128x128xf32>
+    tt.return
+  }
+
+  // expected-remark @below {{All Tensor Regions: [0, 128], [128, 128]}}
+  tt.func private @print_all_regions() attributes {test.print_all_used_regions} {
+    tt.return
+  }
+}
+
+// -----
+
 #shared = #ttg.nvmma_shared<{swizzlingByteWidth = 128, transposed = false, elementBitWidth = 32}>
 #shared1 = #ttg.swizzled_shared<{vec = 1, perPhase = 1, maxPhase = 1, order = [0]}>
 #smem = #ttg.shared_memory