[Backend] Bump to llvm/llvm-project@ac5dc54d5091 (triton-lang#9333)

antiagainst · paultrojahnamd · commit dc8d49e0a261 · 2026-03-19T20:52:02.000+01:00
diff --git a/.github/workflows/llvm-build.yml b/.github/workflows/llvm-build.yml
@@ -104,6 +104,7 @@ jobs:
         sudo apt-get clean
         df -h
         echo "Removing large directories"
+        # deleting 15GB
         df -h
 
     - name: Configure, Build, Test, and Install LLVM (Ubuntu and macOS x64)
@@ -214,6 +215,8 @@ jobs:
         -DCMAKE_RANLIB="/usr/bin/aarch64-linux-gnu-ranlib" \
         -DCMAKE_STRIP="/usr/bin/aarch64-linux-gnu-strip" \
         -DCMAKE_SYSROOT=$SYSROOT \
+        -DLLVM_INCLUDE_TESTS=OFF \
+        -DMLIR_INCLUDE_TESTS=OFF \
         -DLLVM_ENABLE_TERMINFO=OFF \
         llvm-project/llvm
         ninja -C llvm-project/build install
diff --git a/cmake/llvm-hash.txt b/cmake/llvm-hash.txt
@@ -1 +1 @@
-2eb709b95d8f521aa15401e159fac0729d56a677
+ac5dc54d509169d387fcfd495d71853d81c46484
diff --git a/include/triton/Analysis/Alias.h b/include/triton/Analysis/Alias.h
@@ -92,9 +92,8 @@ class SharedMemoryAliasAnalysis
 
   void visitNonControlFlowArguments(
       Operation *op, const RegionSuccessor &successor,
-      ValueRange successorInputs,
-      ArrayRef<dataflow::Lattice<AliasInfo> *> argLattices,
-      unsigned firstIndex) override;
+      ValueRange nonSuccessorInputs,
+      ArrayRef<dataflow::Lattice<AliasInfo> *> nonSuccessorInputLattices) override;
 };
 
 } // namespace mlir
diff --git a/lib/Analysis/Alias.cpp b/lib/Analysis/Alias.cpp
@@ -60,13 +60,11 @@ LogicalResult SharedMemoryAliasAnalysis::visitOperation(
 
 void SharedMemoryAliasAnalysis::visitNonControlFlowArguments(
     Operation *op, const RegionSuccessor &successor,
-    ValueRange successorInputs,
-    ArrayRef<dataflow::Lattice<AliasInfo> *> argLattices, unsigned firstIndex) {
+    ValueRange nonSuccessorInputs,
+    ArrayRef<dataflow::Lattice<AliasInfo> *> argLattices) {
   auto wsOp = dyn_cast<triton::gpu::WarpSpecializePartitionsOp>(op);
   if (!wsOp) {
-    setAllToEntryStates(argLattices.take_front(firstIndex));
-    setAllToEntryStates(argLattices.drop_front(
-        firstIndex + successorInputs.size()));
+    setAllToEntryStates(argLattices);
     return;
   }
 
diff --git a/lib/Analysis/AxisInfo.cpp b/lib/Analysis/AxisInfo.cpp
@@ -146,17 +146,14 @@ class AxisInfoAnalysis : public dataflow::SparseForwardDataFlowAnalysis<
 
   void visitNonControlFlowArguments(
       Operation *op, const RegionSuccessor &successor,
-      ValueRange successorInputs,
-      ArrayRef<dataflow::Lattice<AxisInfo> *> argLattices,
-      unsigned firstIndex) override {
+      ValueRange /*nonSuccessorInputs*/,
+      ArrayRef<dataflow::Lattice<AxisInfo> *> argLattices) override {
     if (auto forOp = dyn_cast<scf::ForOp>(op)) {
       visitForOpInductionVar(forOp, argLattices);
     } else if (auto ws = dyn_cast<gpu::WarpSpecializePartitionsOp>(op)) {
       visitWarpSpecializeExplicitCaptures(ws, successor, argLattices);
     } else {
-      setAllToEntryStates(argLattices.take_front(firstIndex));
-      setAllToEntryStates(
-          argLattices.drop_front(firstIndex + successorInputs.size()));
+      setAllToEntryStates(argLattices);
     }
   }
 
diff --git a/lib/Target/LLVMIR/LLVMDIUtils.cpp b/lib/Target/LLVMIR/LLVMDIUtils.cpp
@@ -0,0 +1,160 @@
+#include "lib/Target/LLVMIR/LLVMDIUtils.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/Location.h"
+#include "mlir/IR/MLIRContext.h"
+#include "mlir/IR/Types.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+
+namespace mlir {
+
+// Note: mlir does not provided any built-in conversion from mlir::Type to
+// mlir::LLVM::DITypeAttr
+LLVM::DITypeAttr LLVMDIUtils::convertType(MLIRContext *context,
+                                          mlir::Type type) {
+  if (type.isInteger(1)) {
+    return LLVM::DIBasicTypeAttr::get(context, llvm::dwarf::DW_TAG_base_type,
+                                      mlir::StringAttr::get(context, "bool"),
+                                      type.getIntOrFloatBitWidth(),
+                                      llvm::dwarf::DW_ATE_boolean);
+  }
+  if (type.isInteger()) {
+    return LLVM::DIBasicTypeAttr::get(context, llvm::dwarf::DW_TAG_base_type,
+                                      mlir::StringAttr::get(context, "int"),
+                                      type.getIntOrFloatBitWidth(),
+                                      llvm::dwarf::DW_ATE_signed);
+  } else if (type.isF16()) {
+    return LLVM::DIBasicTypeAttr::get(context, llvm::dwarf::DW_TAG_base_type,
+                                      mlir::StringAttr::get(context, "half"),
+                                      type.getIntOrFloatBitWidth(),
+                                      llvm::dwarf::DW_ATE_float);
+  } else if (type.isF32()) {
+    return LLVM::DIBasicTypeAttr::get(context, llvm::dwarf::DW_TAG_base_type,
+                                      mlir::StringAttr::get(context, "float"),
+                                      type.getIntOrFloatBitWidth(),
+                                      llvm::dwarf::DW_ATE_float);
+  } else if (type.isF64()) {
+    return LLVM::DIBasicTypeAttr::get(context, llvm::dwarf::DW_TAG_base_type,
+                                      mlir::StringAttr::get(context, "double"),
+                                      type.getIntOrFloatBitWidth(),
+                                      llvm::dwarf::DW_ATE_float);
+  } else if (mlir::isa<mlir::VectorType>(type)) {
+    if (auto vectorTypeSize = calcBitWidth(type); vectorTypeSize.has_value()) {
+      return LLVM::DIBasicTypeAttr::get(
+          context, llvm::dwarf::DW_TAG_base_type,
+          mlir::StringAttr::get(context, "vector"), vectorTypeSize.value(),
+          llvm::dwarf::DW_ATE_float);
+    } else {
+      // TODO: falling back to unknown_type, perhaps theres a better way to
+      // handle when element type size is not determined
+    }
+  }
+  return LLVM::DIBasicTypeAttr::get(
+      context, llvm::dwarf::DW_TAG_base_type,
+      mlir::StringAttr::get(context, "unknown_type"), 0,
+      llvm::dwarf::DW_ATE_signed);
+}
+
+LLVM::DITypeAttr LLVMDIUtils::convertPtrType(MLIRContext *context,
+                                             LLVM::LLVMPointerType pointerType,
+                                             mlir::Type pointeeType,
+                                             DataLayout datalayout) {
+  // LLVMPointerType does not include pointee info, need to pass from external
+  // source
+  unsigned addrSpace = pointerType.getAddressSpace();
+
+  unsigned sizeInBits = datalayout.getTypeSizeInBits(pointerType);
+  LLVM::DITypeAttr diElTypeAttr = convertType(context, pointeeType);
+  LLVM::DITypeAttr diTypeAttr = mlir::LLVM::DIDerivedTypeAttr::get(
+      context, llvm::dwarf::DW_TAG_pointer_type,
+      mlir::StringAttr::get(context, "pointer"), diElTypeAttr, sizeInBits,
+      /*alignInBits=*/0, /*offset=*/0, addrSpace, mlir::LLVM::DIFlags::Zero,
+      /*extra data=*/nullptr);
+  return diTypeAttr;
+}
+
+LLVM::DITypeAttr LLVMDIUtils::convertStructType(MLIRContext *context,
+                                                LLVM::LLVMStructType structType,
+                                                LLVM::DIFileAttr fileAttr,
+                                                DataLayout datalayout,
+                                                int64_t line) {
+
+  assert(!structType.isPacked() && !structType.isIdentified() &&
+         "Only accepts NON-Packed and Literal struct type");
+
+  unsigned sizeInBits = datalayout.getTypeSizeInBits(structType);
+  SmallVector<LLVM::DINodeAttr> elTypes;
+  for (auto [idx, element] : llvm::enumerate(structType.getBody())) {
+    LLVM::DITypeAttr tyAttr = convertType(context, element);
+    elTypes.push_back(tyAttr);
+  }
+
+  return LLVM::DICompositeTypeAttr::get(
+      context, llvm::dwarf::DW_TAG_structure_type,
+      mlir::StringAttr::get(context, "struct"), fileAttr, /*line=*/line,
+      /*scope=*/fileAttr, /*baseType=*/nullptr, mlir::LLVM::DIFlags::Zero,
+      sizeInBits, /*alignInBits=*/0, /*dataLocation=*/nullptr, /*rank=*/nullptr,
+      /*allocated=*/nullptr, /*associated=*/nullptr, elTypes);
+}
+
+LLVM::DITypeAttr LLVMDIUtils::convertArrayType(MLIRContext *context,
+                                               LLVM::LLVMArrayType arrayType,
+                                               LLVM::DIFileAttr fileAttr,
+                                               DataLayout datalayout,
+                                               int64_t line) {
+  unsigned sizeInBits = datalayout.getTypeSizeInBits(arrayType);
+
+  mlir::Type elementType = arrayType.getElementType();
+  LLVM::DITypeAttr baseType = convertType(context, elementType);
+  SmallVector<LLVM::DINodeAttr> elTypes(arrayType.getNumElements(),
+                                        convertType(context, elementType));
+
+  return LLVM::DICompositeTypeAttr::get(
+      context, llvm::dwarf::DW_TAG_array_type,
+      mlir::StringAttr::get(context, "array"), fileAttr, /*line=*/line,
+      /*scope=*/fileAttr, /*baseType=*/baseType, mlir::LLVM::DIFlags::Zero,
+      sizeInBits, /*alignInBits=*/0, /*dataLocation=*/nullptr, /*rank=*/nullptr,
+      /*allocated=*/nullptr, /*associated=*/nullptr, elTypes);
+}
+
+std::optional<unsigned> LLVMDIUtils::calcBitWidth(mlir::Type type) {
+  if (type.isIntOrFloat()) {
+    return type.getIntOrFloatBitWidth();
+  } else if (mlir::isa<mlir::VectorType>(type)) {
+    auto vectorType = dyn_cast<mlir::VectorType>(type);
+    llvm::ArrayRef<int64_t> shape = vectorType.getShape();
+    mlir::Type elementType = vectorType.getElementType();
+    llvm::ArrayRef<bool> scalableDims = vectorType.getScalableDims();
+    unsigned size = 1;
+    for (auto i : shape) {
+      size *= i;
+    }
+
+    if (auto elementTypeSize = calcBitWidth(elementType);
+        elementTypeSize.has_value()) {
+      return size * elementTypeSize.value();
+    }
+  }
+
+  return std::nullopt;
+}
+
+/// Attempt to extract a filename for the given loc.
+FileLineColLoc LLVMDIUtils::extractFileLoc(Location loc, bool getCaller) {
+  if (auto fileLoc = dyn_cast<FileLineColLoc>(loc))
+    return fileLoc;
+  if (auto nameLoc = dyn_cast<NameLoc>(loc))
+    return extractFileLoc(nameLoc.getChildLoc());
+  if (auto opaqueLoc = dyn_cast<OpaqueLoc>(loc))
+    return extractFileLoc(opaqueLoc.getFallbackLocation());
+  if (auto fusedLoc = dyn_cast<FusedLoc>(loc))
+    return extractFileLoc(fusedLoc.getLocations().front());
+  if (auto callerLoc = dyn_cast<CallSiteLoc>(loc))
+    return getCaller ? extractFileLoc(callerLoc.getCaller())
+                     : extractFileLoc(callerLoc.getCallee());
+  StringAttr unknownFile = mlir::StringAttr::get(loc.getContext(), "<unknown>");
+  return mlir::FileLineColLoc::get(unknownFile, 0, 0);
+}
+
+} // namespace mlir
diff --git a/python/test/unit/runtime/test_autotuner.py b/python/test/unit/runtime/test_autotuner.py
@@ -6,7 +6,7 @@
 
 import pathlib
 import uuid
-from triton._internal_testing import is_cuda
+from triton._internal_testing import is_cuda, is_hip_cdna2
 
 
 def do_bench(kernel_call, quantiles, use_cuda_graph=False):
@@ -84,6 +84,7 @@ def _kernel(src, N, BLOCK_SIZE: tl.constexpr):
     triton.testing.assert_close(src, torch.ones_like(src))
 
 
+@pytest.mark.skipif(is_hip_cdna2(), reason="Hit LLVM assertion in splitLiveThroughBlock")
 def test_hooks(device):
     # Autotuner's pre- and post- hooks should be called the same number of times
     N = 4096
diff --git a/test/Conversion/amd/mbarrier_ops_to_llvm_gfx1250.mlir b/test/Conversion/amd/mbarrier_ops_to_llvm_gfx1250.mlir
@@ -8,7 +8,10 @@ module attributes {"ttg.target" = "hip:gfx1250", "ttg.num-ctas" = 1 : i32, "ttg.
     // GFX1250: %[[INIT_VAL1:.+]] = llvm.mlir.constant(4294967297 : i64) : i64
     // GFX1250: %[[ALLOC_PTR:.+]] = llvm.extractvalue %arg0[0] : !llvm.struct<(ptr<3>, i32)>
     // GFX1250: llvm.store %[[INIT_VAL1]], %[[ALLOC_PTR]] : i64, !llvm.ptr<3>
-    // GFX1250: rocdl.barrier
+    // GFX1250: llvm.fence syncscope("workgroup") release
+    // GFX1250: rocdl.s.barrier.signal{{.*}}
+    // GFX1250: rocdl.s.barrier.wait{{.*}}
+    // GFX1250: llvm.fence syncscope("workgroup") acquire
     amdg.init_barrier %alloc, 2 : !ttg.memdesc<1xi64, #shared, #smem, mutable>
     tt.return
   }
diff --git a/test/TritonGPU/amd/amd-range-analysis.mlir b/test/TritonGPU/amd/amd-range-analysis.mlir
@@ -165,7 +165,7 @@ module attributes {"ttg.num-warps" = 4 : i32} {
     // expected-remark@+2 {{unsigned : [0, 1023] signed : [0, 1023]}}
     // expected-remark@+1 {{non-neg}}
     %4 = arith.extsi %2 : tensor<1024xi32> to tensor<1024xi64>
-    // expected-remark@+3 {{result 1: unsigned : [0, 130944] signed : [0, 130944]}}
+    // expected-remark@+3 {{result 1: unsigned : [0, 131967] signed : [0, 131967]}}
     // expected-remark@+2 {{result 1: non-neg}}
     // expected-remark@+1 {{inferred total trip count: 128}}
     %5:3 = scf.for %arg2 = %c0 to %c128 step %c1 iter_args(%arg3 = %3, %arg4 = %4, %arg5 = %arg1) -> (!tt.ptr<f32>, tensor<1024xi64>, tensor<1024xf32>) {
@@ -186,7 +186,7 @@ module attributes {"ttg.num-warps" = 4 : i32} {
     // expected-remark@+2 {{unsigned : [0, 1023] signed : [0, 1023]}}
     // expected-remark@+1 {{non-neg}}
     %7 = arith.extsi %2 : tensor<1024xi32> to tensor<1024xi64>
-    // expected-remark@+2 {{unsigned : [0, 131967] signed : [0, 131967]}}
+    // expected-remark@+2 {{unsigned : [0, 132990] signed : [0, 132990]}}
     // expected-remark@+1 {{non-neg}}
     %8 = arith.addi %7, %5#1 : tensor<1024xi64>
     %9 = tt.splat %6 : !tt.ptr<f32> -> tensor<1024x!tt.ptr<f32>>
@@ -216,7 +216,7 @@ module attributes {"ttg.num-warps" = 4 : i32} {
     // expected-remark@+1 {{non-neg}}
     %1 = arith.muli %0, %c1024_i32 : i32
     %2 = tt.make_range {end = 1024 : i32, start = 0 : i32} : tensor<1024xi32>
-    // expected-remark@+3 {{result 1: unsigned : [0, 129921] signed : [0, 129921]}}
+    // expected-remark@+3 {{result 1: unsigned : [0, 130944] signed : [0, 130944]}}
     // expected-remark@+2 {{result 1: non-neg}}
     // expected-remark@+1 {{inferred total trip count: 128}}
     %3:3 = scf.for %arg2 = %c0 to %c128 step %c1 iter_args(%arg3 = %arg0, %arg4 = %cst, %arg5 = %arg1) -> (!tt.ptr<f32>, tensor<1024xi64>, tensor<1024xf32>) {
@@ -237,7 +237,7 @@ module attributes {"ttg.num-warps" = 4 : i32} {
     // expected-remark@+2 {{unsigned : [0, 1023] signed : [0, 1023]}}
     // expected-remark@+1 {{non-neg}}
     %5 = arith.extsi %2 : tensor<1024xi32> to tensor<1024xi64>
-    // expected-remark@+2 {{unsigned : [0, 130944] signed : [0, 130944]}}
+    // expected-remark@+2 {{unsigned : [0, 131967] signed : [0, 131967]}}
     // expected-remark@+1 {{non-neg}}
     %6 = arith.addi %5, %3#1 : tensor<1024xi64>
     %7 = tt.splat %4 : !tt.ptr<f32> -> tensor<1024x!tt.ptr<f32>>
@@ -267,11 +267,11 @@ module attributes {"ttg.num-warps" = 4 : i32} {
     // expected-remark@+1 {{non-neg}}
     %1 = arith.muli %0, %c1024_i32 : i32
     %2 = tt.make_range {end = 1024 : i32, start = 0 : i32} : tensor<1024xi32>
-    // expected-remark@+3 {{result 1: unsigned : [0, 15345] signed : [0, 15345]}}
+    // expected-remark@+3 {{result 1: unsigned : [0, 17391] signed : [0, 17391]}}
     // expected-remark@+2 {{result 1: non-neg}}
     // expected-remark@+1 {{inferred total trip count: 16}}
     %3:3 = scf.for %arg2 = %c0 to %c16 step %c1 iter_args(%arg3 = %arg0, %arg4 = %cst, %arg5 = %arg1) -> (!tt.ptr<f32>, tensor<1024xi64>, tensor<1024xf32>) {
-      // expected-remark@+3 {{result 1: unsigned : [0, 260865] signed : [0, 260865]}}
+      // expected-remark@+3 {{result 1: unsigned : [0, 261888] signed : [0, 261888]}}
       // expected-remark@+2 {{result 1: non-neg}}
       // expected-remark@+1 {{inferred total trip count: 256}}
       %10:3 = scf.for %arg6 = %c0 to %c16 step %c1 iter_args(%arg7 = %arg3, %arg8 = %arg4, %arg9 = %arg5) -> (!tt.ptr<f32>, tensor<1024xi64>, tensor<1024xf32>) {
@@ -294,7 +294,7 @@ module attributes {"ttg.num-warps" = 4 : i32} {
     // expected-remark@+2 {{unsigned : [0, 1023] signed : [0, 1023]}}
     // expected-remark@+1 {{non-neg}}
     %5 = arith.extsi %2 : tensor<1024xi32> to tensor<1024xi64>
-    // expected-remark@+2 {{unsigned : [0, 16368] signed : [0, 16368]}}
+    // expected-remark@+2 {{unsigned : [0, 18414] signed : [0, 18414]}}
     // expected-remark@+1 {{non-neg}}
     %6 = arith.addi %5, %3#1 : tensor<1024xi64>
     %7 = tt.splat %4 : !tt.ptr<f32> -> tensor<1024x!tt.ptr<f32>>
@@ -633,7 +633,7 @@ module attributes {"ttg.num-warps" = 4 : i32} {
     // expected-remark@+2 {{unsigned : [0, 1023] signed : [0, 1023]}}
     // expected-remark@+1 {{non-neg}}
     %3 = arith.extsi %1 : tensor<1024xi32> to tensor<1024xi64>
-    // expected-remark@+3 {{result 1: unsigned : [0, 130944] signed : [0, 130944]}}
+    // expected-remark@+3 {{result 1: unsigned : [0, 131967] signed : [0, 131967]}}
     // expected-remark@+2 {{result 1: non-neg}}
     // expected-remark@+1 {{inferred total trip count: 128}}
     %4:3 = scf.for %arg2 = %c0 to %c128 step %c1 iter_args(%arg3 = %2, %arg4 = %3, %arg5 = %arg1) -> (!tt.ptr<f32>, tensor<1024xi64>, tensor<1024xf32>) {
@@ -658,7 +658,7 @@ module attributes {"ttg.num-warps" = 4 : i32} {
     // expected-remark@+2 {{unsigned : [0, 1023] signed : [0, 1023]}}
     // expected-remark@+1 {{non-neg}}
     %6 = arith.extsi %1 : tensor<1024xi32> to tensor<1024xi64>
-    // expected-remark@+2 {{unsigned : [0, 131967] signed : [0, 131967]}}
+    // expected-remark@+2 {{unsigned : [0, 132990] signed : [0, 132990]}}
     // expected-remark@+1 {{non-neg}}
     %7 = arith.addi %6, %4#1 : tensor<1024xi64>
     %8 = tt.splat %5 : !tt.ptr<f32> -> tensor<1024x!tt.ptr<f32>>
@@ -751,7 +751,7 @@ module attributes {"ttg.num-warps" = 4 : i32} {
     // expected-remark@+2 {{unsigned : [0, 1023] signed : [0, 1023]}}
     // expected-remark@+1 {{non-neg}}
     %6 = arith.extsi %2 : tensor<1024xi32> to tensor<1024xi64>
-    // expected-remark@+5 {{result 1: unsigned : [0, 130944] signed : [0, 130944]}}
+    // expected-remark@+5 {{result 1: unsigned : [0, 131967] signed : [0, 131967]}}
     // expected-remark@+4 {{result 3: unsigned : [0, 130944] signed : [0, 130944]}}
     // expected-remark@+3 {{result 1: non-neg}}
     // expected-remark@+2 {{result 3: non-neg}}
@@ -774,7 +774,7 @@ module attributes {"ttg.num-warps" = 4 : i32} {
     // expected-remark@+2 {{unsigned : [0, 1023] signed : [0, 1023]}}
     // expected-remark@+1 {{non-neg}}
     %9 = arith.extsi %2 : tensor<1024xi32> to tensor<1024xi64>
-    // expected-remark@+2 {{unsigned : [0, 131967] signed : [0, 131967]}}
+    // expected-remark@+2 {{unsigned : [0, 132990] signed : [0, 132990]}}
     // expected-remark@+1 {{non-neg}}
     %10 = arith.addi %9, %7#1 : tensor<1024xi64>
     %11 = tt.splat %8 : !tt.ptr<f32> -> tensor<1024x!tt.ptr<f32>>
@@ -811,8 +811,8 @@ module attributes {"ttg.num-warps" = 4 : i32} {
     // expected-remark@+2 {{unsigned : [0, 1023] signed : [0, 1023]}}
     // expected-remark@+1 {{non-neg}}
     %6 = arith.extsi %2 : tensor<1024xi32> to tensor<1024xi64>
-    // expected-remark@+5 {{result 1: unsigned : [0, 130944] signed : [0, 130944]}}
-    // expected-remark@+4 {{result 4: unsigned : [0, 130944] signed : [0, 130944]}}
+    // expected-remark@+5 {{result 1: unsigned : [0, 131967] signed : [0, 131967]}}
+    // expected-remark@+4 {{result 4: unsigned : [0, 131967] signed : [0, 131967]}}
     // expected-remark@+3 {{result 1: non-neg}}
     // expected-remark@+2 {{result 4: non-neg}}
     // expected-remark@+1 {{inferred total trip count: 128}}
@@ -845,7 +845,7 @@ module attributes {"ttg.num-warps" = 4 : i32} {
     // expected-remark@+2 {{unsigned : [0, 1023] signed : [0, 1023]}}
     // expected-remark@+1 {{non-neg}}
     %9 = arith.extsi %2 : tensor<1024xi32> to tensor<1024xi64>
-    // expected-remark@+2 {{unsigned : [0, 131967] signed : [0, 131967]}}
+    // expected-remark@+2 {{unsigned : [0, 132990] signed : [0, 132990]}}
     // expected-remark@+1 {{non-neg}}
     %10 = arith.addi %9, %7#1 : tensor<1024xi64>
     %11 = tt.splat %8 : !tt.ptr<f32> -> tensor<1024x!tt.ptr<f32>>
@@ -855,7 +855,7 @@ module attributes {"ttg.num-warps" = 4 : i32} {
     // expected-remark@+2 {{unsigned : [0, 1023] signed : [0, 1023]}}
     // expected-remark@+1 {{non-neg}}
     %15 = arith.extsi %2 : tensor<1024xi32> to tensor<1024xi64>
-    // expected-remark@+2 {{unsigned : [0, 131967] signed : [0, 131967]}}
+    // expected-remark@+2 {{unsigned : [0, 132990] signed : [0, 132990]}}
     // expected-remark@+1 {{non-neg}}
     %16 = arith.addi %15, %7#4 : tensor<1024xi64>
     %17 = tt.splat %14 : !tt.ptr<f32> -> tensor<1024x!tt.ptr<f32>>
diff --git a/third_party/amd/lib/Analysis/RangeAnalysis.cpp b/third_party/amd/lib/Analysis/RangeAnalysis.cpp
diff --git a/third_party/amd/lib/TritonAMDGPUToLLVM/DotOpToLLVM/MFMA.cpp b/third_party/amd/lib/TritonAMDGPUToLLVM/DotOpToLLVM/MFMA.cpp
diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/TargetInfo.cpp b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/TargetInfo.cpp

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-2eb709b95d8f521aa15401e159fac0729d56a677`
	`1`	`+ac5dc54d509169d387fcfd495d71853d81c46484`