Skip to content

Conversation

@apaszke
Copy link
Member

@apaszke apaszke commented Jan 5, 2026

This lets us properly annotate ranges for gpu.cluster_block_id and gpu.cluster_dim_blocks. It also allows us to fill in the nvvm.cluster_dim attribute for use in the NVVM backend.

@llvmbot
Copy link
Member

llvmbot commented Jan 5, 2026

@llvm/pr-subscribers-mlir-gpu

@llvm/pr-subscribers-mlir

Author: Adam Paszke (apaszke)

Changes

This lets us properly annotate ranges for gpu.cluster_block_id and gpu.cluster_dim_blocks. It also allows us to fill in the nvvm.cluster_dim attribute for use in the NVVM backend.


Full diff: https://github.com/llvm/llvm-project/pull/174404.diff

12 Files Affected:

  • (modified) mlir/include/mlir/Dialect/GPU/IR/GPUBase.td (+2-2)
  • (modified) mlir/include/mlir/Dialect/GPU/IR/GPUOps.td (+2-1)
  • (modified) mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp (+10-1)
  • (modified) mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h (+9)
  • (modified) mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h (+11-1)
  • (modified) mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp (+1)
  • (modified) mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp (+5-3)
  • (modified) mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp (+2-1)
  • (modified) mlir/lib/Dialect/GPU/IR/GPUDialect.cpp (+2)
  • (modified) mlir/lib/Dialect/GPU/IR/InferIntRangeInterfaceImpls.cpp (+19-1)
  • (modified) mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir (+42)
  • (added) mlir/test/Dialect/GPU/int-range-interface-cluster.mlir (+27)
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td b/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
index 2c29bb8a01a41..f0086158fb9b6 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
@@ -61,10 +61,10 @@ def GPU_Dialect : Dialect {
     /// attribute with value 'workgroup`.
     static bool isWorkgroupMemoryAddressSpace(Attribute memorySpace);
   }];
-
   let discardableAttrs = (ins
     "::mlir::DenseI32ArrayAttr":$known_block_size,
-    "::mlir::DenseI32ArrayAttr":$known_grid_size
+    "::mlir::DenseI32ArrayAttr":$known_grid_size,
+    "::mlir::DenseI32ArrayAttr":$known_cluster_size
   );
 
   let dependentDialects = ["arith::ArithDialect"];
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 4884541a60535..e8c23200547d6 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -431,7 +431,8 @@ def GPU_GPUFuncOp : GPU_Op<"func", [
                        OptionalAttr<DictArrayAttr>:$workgroup_attrib_attrs,
                        OptionalAttr<DictArrayAttr>:$private_attrib_attrs,
                        GPU_OptionalDimSizeHintAttr:$known_block_size,
-                       GPU_OptionalDimSizeHintAttr:$known_grid_size);
+                       GPU_OptionalDimSizeHintAttr:$known_grid_size,
+                       GPU_OptionalDimSizeHintAttr:$known_cluster_size);
   let regions = (region AnyRegion:$body);
 
   let skipDefaultBuilders = 1;
diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
index eb662a1b056de..52dbeea829594 100644
--- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
+++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
@@ -186,7 +186,8 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
         attr.getName() == gpuFuncOp.getWorkgroupAttribAttrsAttrName() ||
         attr.getName() == gpuFuncOp.getPrivateAttribAttrsAttrName() ||
         attr.getName() == gpuFuncOp.getKnownBlockSizeAttrName() ||
-        attr.getName() == gpuFuncOp.getKnownGridSizeAttrName())
+        attr.getName() == gpuFuncOp.getKnownGridSizeAttrName() ||
+        attr.getName() == gpuFuncOp.getKnownClusterSizeAttrName())
       continue;
     if (attr.getName() == gpuFuncOp.getArgAttrsAttrName()) {
       argAttrs = gpuFuncOp.getArgAttrsAttr();
@@ -197,6 +198,7 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
 
   DenseI32ArrayAttr knownBlockSize = gpuFuncOp.getKnownBlockSizeAttr();
   DenseI32ArrayAttr knownGridSize = gpuFuncOp.getKnownGridSizeAttr();
+  DenseI32ArrayAttr knownClusterSize = gpuFuncOp.getKnownClusterSizeAttr();
   // Ensure we don't lose information if the function is lowered before its
   // surrounding context.
   auto *gpuDialect = cast<gpu::GPUDialect>(gpuFuncOp->getDialect());
@@ -206,6 +208,9 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
   if (knownGridSize)
     attributes.emplace_back(gpuDialect->getKnownGridSizeAttrHelper().getName(),
                             knownGridSize);
+  if (knownClusterSize)
+    attributes.emplace_back(gpuDialect->getKnownClusterSizeAttrHelper().getName(),
+                            knownClusterSize);
 
   // Add a dialect specific kernel attribute in addition to GPU kernel
   // attribute. The former is necessary for further translation while the
@@ -217,6 +222,10 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
     if (kernelBlockSizeAttributeName && knownBlockSize) {
       attributes.emplace_back(kernelBlockSizeAttributeName, knownBlockSize);
     }
+    // Set the dialect-specific cluster size attribute if there is one.
+    if (kernelClusterSizeAttributeName && knownClusterSize) {
+      attributes.emplace_back(kernelClusterSizeAttributeName, knownClusterSize);
+    }
   }
   LLVM::CConv callingConvention = gpuFuncOp.isKernel()
                                       ? kernelCallingConvention
diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h
index ec74787b2a8ed..47094e91e4dcc 100644
--- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h
+++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h
@@ -73,6 +73,10 @@ struct GPUFuncOpLoweringOptions {
   /// The attribute name to to set block size. Null if no attribute should be
   /// used.
   StringAttr kernelBlockSizeAttributeName;
+  /// The attribute name to to set cluster size. Null if no attribute should be
+  /// used.
+  StringAttr kernelClusterSizeAttributeName;
+
 
   /// The calling convention to use for kernel functions.
   LLVM::CConv kernelCallingConvention = LLVM::CConv::C;
@@ -93,6 +97,7 @@ struct GPUFuncOpLowering : ConvertOpToLLVMPattern<gpu::GPUFuncOp> {
         workgroupAddrSpace(options.workgroupAddrSpace),
         kernelAttributeName(options.kernelAttributeName),
         kernelBlockSizeAttributeName(options.kernelBlockSizeAttributeName),
+        kernelClusterSizeAttributeName(options.kernelClusterSizeAttributeName),
         kernelCallingConvention(options.kernelCallingConvention),
         nonKernelCallingConvention(options.nonKernelCallingConvention),
         encodeWorkgroupAttributionsAsArguments(
@@ -114,6 +119,10 @@ struct GPUFuncOpLowering : ConvertOpToLLVMPattern<gpu::GPUFuncOp> {
   /// The attribute name to to set block size. Null if no attribute should be
   /// used.
   StringAttr kernelBlockSizeAttributeName;
+  /// The attribute name to to set cluster size. Null if no attribute should be
+  /// used.
+  StringAttr kernelClusterSizeAttributeName;
+
 
   /// The calling convention to use for kernel functions
   LLVM::CConv kernelCallingConvention;
diff --git a/mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h b/mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h
index 91c43e8bd1117..ae0239132e7d0 100644
--- a/mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h
+++ b/mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h
@@ -17,7 +17,7 @@
 namespace mlir {
 namespace gpu {
 namespace index_lowering {
-enum class IndexKind : uint32_t { Other = 0, Block = 1, Grid = 2 };
+enum class IndexKind : uint32_t { Other = 0, Block = 1, Grid = 2, Cluster = 3 };
 enum class IntrType : uint32_t {
   None = 0,
   Id = 1,
@@ -92,6 +92,13 @@ struct OpLowering : public ConvertOpToLLVMPattern<Op> {
           funcBounds = gridHelper.getAttr(funcOp);
         break;
       }
+      case IndexKind::Cluster: {
+        auto clusterHelper =
+            gpu::GPUDialect::KnownClusterSizeAttrHelper(op.getContext());
+        if (clusterHelper.isAttrPresent(funcOp))
+          funcBounds = clusterHelper.getAttr(funcOp);
+        break;
+      }
       case IndexKind::Other:
         break;
       }
@@ -104,6 +111,9 @@ struct OpLowering : public ConvertOpToLLVMPattern<Op> {
       case IndexKind::Grid:
         funcBounds = gpuFunc.getKnownGridSizeAttr();
         break;
+      case IndexKind::Cluster:
+        funcBounds = gpuFunc.getKnownClusterSizeAttr();
+        break;
       case IndexKind::Other:
         break;
       }
diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index c0480a1dfb512..dd05c913347ee 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -533,6 +533,7 @@ void populateGpuToLLVMSPVConversionPatterns(
       GPUFuncOpLoweringOptions{
           privateAddressSpace, localAddressSpace,
           /*kernelAttributeName=*/{}, kernelBlockSizeAttributeName,
+          /*kernelClusterSizeAttributeName=*/{},
           LLVM::CConv::SPIR_KERNEL, LLVM::CConv::SPIR_FUNC,
           /*encodeWorkgroupAttributionsAsArguments=*/true});
 }
diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
index 2561ca00d4b4f..6394296e99b9e 100644
--- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -708,11 +708,11 @@ void mlir::populateGpuToNVVMConversionPatterns(
   patterns.add<gpu::index_lowering::OpLowering<
       gpu::ClusterBlockIdOp, NVVM::BlockInClusterIdXOp,
       NVVM::BlockInClusterIdYOp, NVVM::BlockInClusterIdZOp>>(
-      converter, IndexKind::Other, IntrType::Id, benefit);
+      converter, IndexKind::Cluster, IntrType::Id, benefit);
   patterns.add<gpu::index_lowering::OpLowering<
       gpu::ClusterDimBlocksOp, NVVM::ClusterDimBlocksXOp,
       NVVM::ClusterDimBlocksYOp, NVVM::ClusterDimBlocksZOp>>(
-      converter, IndexKind::Other, IntrType::Dim, benefit);
+      converter, IndexKind::Cluster, IntrType::Dim, benefit);
   patterns.add<gpu::index_lowering::OpLowering<
       gpu::BlockIdOp, NVVM::BlockIdXOp, NVVM::BlockIdYOp, NVVM::BlockIdZOp>>(
       converter, IndexKind::Grid, IntrType::Id, benefit);
@@ -737,7 +737,9 @@ void mlir::populateGpuToNVVMConversionPatterns(
           StringAttr::get(&converter.getContext(),
                           NVVM::NVVMDialect::getKernelFuncAttrName()),
           StringAttr::get(&converter.getContext(),
-                          NVVM::NVVMDialect::getMaxntidAttrName())},
+                          NVVM::NVVMDialect::getMaxntidAttrName()),
+          StringAttr::get(&converter.getContext(),
+                          NVVM::NVVMDialect::getClusterDimAttrName())},
       benefit);
 
   populateLibDeviceConversionPatterns(converter, patterns, benefit);
diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
index 51741414d2060..b8eb6d7facc6d 100644
--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -458,7 +458,8 @@ void mlir::populateGpuToROCDLConversionPatterns(
           /*allocaAddrSpace=*/ROCDL::ROCDLDialect::kPrivateMemoryAddressSpace,
           /*workgroupAddrSpace=*/ROCDL::ROCDLDialect::kSharedMemoryAddressSpace,
           rocdlDialect->getKernelAttrHelper().getName(),
-          rocdlDialect->getReqdWorkGroupSizeAttrHelper().getName()});
+          rocdlDialect->getReqdWorkGroupSizeAttrHelper().getName(),
+          /*kernelClusterSizeAttributeName=*/{}});
   if (Runtime::HIP == runtime) {
     patterns.add<GPUPrintfOpToHIPLowering>(converter);
   } else if (Runtime::OpenCL == runtime) {
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index 21c0d369b8d1c..36db6e82baaea 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -396,6 +396,8 @@ LogicalResult GPUDialect::verifyOperationAttribute(Operation *op,
     return verifyKnownLaunchSizeAttr(op, attr);
   if (attr.getName() == getKnownGridSizeAttrHelper().getName())
     return verifyKnownLaunchSizeAttr(op, attr);
+  if (attr.getName() == getKnownClusterSizeAttrHelper().getName())
+    return verifyKnownLaunchSizeAttr(op, attr);
   if (!llvm::isa<UnitAttr>(attr.getValue()) ||
       attr.getName() != getContainerModuleAttrName())
     return success();
diff --git a/mlir/lib/Dialect/GPU/IR/InferIntRangeInterfaceImpls.cpp b/mlir/lib/Dialect/GPU/IR/InferIntRangeInterfaceImpls.cpp
index bee3f392c91b5..263fcb96c17db 100644
--- a/mlir/lib/Dialect/GPU/IR/InferIntRangeInterfaceImpls.cpp
+++ b/mlir/lib/Dialect/GPU/IR/InferIntRangeInterfaceImpls.cpp
@@ -30,7 +30,7 @@ static ConstantIntRanges getIndexRange(uint64_t umin, uint64_t umax) {
 }
 
 namespace {
-enum class LaunchDims : uint32_t { Block = 0, Grid = 1 };
+enum class LaunchDims : uint32_t { Block = 0, Grid = 1, Cluster = 2 };
 } // end namespace
 
 /// If the operation `op` is in a context that is annotated with maximum
@@ -63,6 +63,9 @@ getKnownLaunchAttr(GPUFuncOp func, LaunchDims dims, Dimension dim) {
   case LaunchDims::Grid:
     bounds = func.getKnownGridSizeAttr();
     break;
+  case LaunchDims::Cluster:
+    bounds = func.getKnownClusterSizeAttr();
+    break;
   }
   if (!bounds)
     return std::nullopt;
@@ -94,6 +97,13 @@ static std::optional<uint64_t> getKnownLaunchDim(Op op, LaunchDims type) {
     case LaunchDims::Grid:
       bounds = launch.getGridSizeOperandValues();
       break;
+    case LaunchDims::Cluster:
+      if (launch.hasClusterSize()) {
+        auto clusterBounds = launch.getClusterSizeOperandValues();
+        if (clusterBounds)
+          bounds = *clusterBounds;
+      }
+      break;
     }
     Value maybeBound = valueByDim(bounds, dim);
     APInt value;
@@ -115,6 +125,9 @@ static std::optional<uint64_t> getKnownLaunchDim(Op op, LaunchDims type) {
     case LaunchDims::Grid:
       attrName = GPUDialect::KnownGridSizeAttrHelper::getNameStr();
       break;
+    case LaunchDims::Cluster:
+      attrName = GPUDialect::KnownClusterSizeAttrHelper::getNameStr();
+      break;
     }
     auto discardableAttr = getKnownLaunchAttr(func, attrName, dim);
     if (discardableAttr)
@@ -133,6 +146,9 @@ void ClusterDimOp::inferResultRanges(ArrayRef<ConstantIntRanges>,
 
 void ClusterDimBlocksOp::inferResultRanges(ArrayRef<ConstantIntRanges>,
                                            SetIntRangeFn setResultRange) {
+  if (auto known = getKnownLaunchDim(*this, LaunchDims::Cluster))
+    return setResultRange(getResult(), getIndexRange(*known, *known));
+
   uint64_t max = kMaxClusterDim;
   if (auto specified = getUpperBound())
     max = specified->getZExtValue();
@@ -150,6 +166,8 @@ void ClusterIdOp::inferResultRanges(ArrayRef<ConstantIntRanges>,
 void ClusterBlockIdOp::inferResultRanges(ArrayRef<ConstantIntRanges>,
                                          SetIntRangeFn setResultRange) {
   uint64_t max = kMaxClusterDim;
+  if (auto known = getKnownLaunchDim(*this, LaunchDims::Cluster))
+    max = *known;
   if (auto specified = getUpperBound())
     max = specified->getZExtValue();
   setResultRange(getResult(), getIndexRange(0, max - 1ULL));
diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
index f1cc1eb983267..55ee508aa9f55 100644
--- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
@@ -1149,3 +1149,45 @@ gpu.module @test_module_56 {
     func.return %sin16, %cos16, %sin32, %cos32, %sin64, %cos64 : f16, f16, f32, f32, f64, f64
   }
 }
+
+// -----
+
+gpu.module @test_module_cluster_size {
+  // CHECK-LABEL: llvm.func @kernel_with_cluster_size()
+  // CHECK-SAME: nvvm.cluster_dim = array<i32: 8, 2, 4>
+  gpu.func @kernel_with_cluster_size() kernel attributes {known_cluster_size = array<i32: 8, 2, 4>} {
+    gpu.return
+  }
+}
+
+// -----
+
+gpu.module @test_module_cluster_block_ops {
+// CHECK-LABEL: llvm.func @kernel_with_cluster_size(
+// CHECK-SAME: %[[ARG0:.*]]: !llvm.ptr)
+// CHECK-SAME: gpu.known_cluster_size = array<i32: 8, 4, 2>
+  gpu.func @kernel_with_cluster_size(%arg0: !llvm.ptr) kernel attributes {known_cluster_size = array<i32: 8, 4, 2>} {
+    // CHECK: nvvm.read.ptx.sreg.cluster.ctaid.x range <i32, 0, 8> : i32
+    %0 = gpu.cluster_block_id x
+    // CHECK: nvvm.read.ptx.sreg.cluster.ctaid.y range <i32, 0, 4> : i32
+    %1 = gpu.cluster_block_id y
+    // CHECK: nvvm.read.ptx.sreg.cluster.ctaid.z range <i32, 0, 2> : i32
+    %2 = gpu.cluster_block_id z
+    // CHECK: nvvm.read.ptx.sreg.cluster.nctaid.x range <i32, 1, 9> : i32
+    %3 = gpu.cluster_dim_blocks x
+    // CHECK: nvvm.read.ptx.sreg.cluster.nctaid.y range <i32, 1, 5> : i32
+    %4 = gpu.cluster_dim_blocks y
+    // CHECK: nvvm.read.ptx.sreg.cluster.nctaid.z range <i32, 1, 3> : i32
+    %5 = gpu.cluster_dim_blocks z
+
+    %6 = arith.addi %0, %1 : index
+    %7 = arith.addi %6, %2 : index
+    %8 = arith.addi %7, %3 : index
+    %9 = arith.addi %8, %4 : index
+    %10 = arith.addi %9, %5 : index
+    %11 = arith.index_cast %10 : index to i64
+    llvm.store %11, %arg0 : i64, !llvm.ptr
+    gpu.return
+  }
+}
+
diff --git a/mlir/test/Dialect/GPU/int-range-interface-cluster.mlir b/mlir/test/Dialect/GPU/int-range-interface-cluster.mlir
new file mode 100644
index 0000000000000..a7dd0df2e2c13
--- /dev/null
+++ b/mlir/test/Dialect/GPU/int-range-interface-cluster.mlir
@@ -0,0 +1,27 @@
+// RUN: mlir-opt -int-range-optimizations %s | FileCheck %s
+ 
+gpu.module @test_module {
+  gpu.func @test_cluster_ranges() kernel attributes {known_cluster_size = array<i32: 8, 4, 1>} {
+    %c0 = gpu.cluster_block_id x
+    // CHECK: test.reflect_bounds {smax = 7 : index, smin = 0 : index, umax = 7 : index, umin = 0 : index}
+    %c0_0 = test.reflect_bounds %c0 : index
+    %c1 = gpu.cluster_block_id y
+    // CHECK: test.reflect_bounds {smax = 3 : index, smin = 0 : index, umax = 3 : index, umin = 0 : index}
+    %c1_0 = test.reflect_bounds %c1 : index
+    %c2 = gpu.cluster_block_id z
+    // CHECK: test.reflect_bounds {smax = 0 : index, smin = 0 : index, umax = 0 : index, umin = 0 : index}
+    %c2_0 = test.reflect_bounds %c2 : index
+
+    %d0 = gpu.cluster_dim_blocks x
+    // CHECK: test.reflect_bounds {smax = 8 : index, smin = 8 : index, umax = 8 : index, umin = 8 : index}
+    %d0_0 = test.reflect_bounds %d0 : index
+    %d1 = gpu.cluster_dim_blocks y
+    // CHECK: test.reflect_bounds {smax = 4 : index, smin = 4 : index, umax = 4 : index, umin = 4 : index}
+    %d1_0 = test.reflect_bounds %d1 : index
+    %d2 = gpu.cluster_dim_blocks z
+    // CHECK: test.reflect_bounds {smax = 1 : index, smin = 1 : index, umax = 1 : index, umin = 1 : index}
+    %d2_0 = test.reflect_bounds %d2 : index
+
+    gpu.return
+  }
+}

@github-actions
Copy link

github-actions bot commented Jan 5, 2026

✅ With the latest revision this PR passed the C/C++ code formatter.

This lets us properly annotate ranges for gpu.cluster_block_id and gpu.cluster_dim_blocks.
It also allows us to fill in the nvvm.cluster_dim attribute for use in the NVVM backend.
@apaszke apaszke force-pushed the cluster-size-ranges branch from 0fdb97a to 4a77a2a Compare January 5, 2026 13:47
@jpienaar jpienaar merged commit 9a93769 into llvm:main Jan 6, 2026
10 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants