Skip to content

Commit dc8d49e

Browse files
antiagainstpaultrojahnamd
authored andcommitted
1 parent 4a63ec9 commit dc8d49e

12 files changed

Lines changed: 243 additions & 69 deletions

File tree

.github/workflows/llvm-build.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ jobs:
104104
sudo apt-get clean
105105
df -h
106106
echo "Removing large directories"
107+
# deleting 15GB
107108
df -h
108109
109110
- name: Configure, Build, Test, and Install LLVM (Ubuntu and macOS x64)
@@ -214,6 +215,8 @@ jobs:
214215
-DCMAKE_RANLIB="/usr/bin/aarch64-linux-gnu-ranlib" \
215216
-DCMAKE_STRIP="/usr/bin/aarch64-linux-gnu-strip" \
216217
-DCMAKE_SYSROOT=$SYSROOT \
218+
-DLLVM_INCLUDE_TESTS=OFF \
219+
-DMLIR_INCLUDE_TESTS=OFF \
217220
-DLLVM_ENABLE_TERMINFO=OFF \
218221
llvm-project/llvm
219222
ninja -C llvm-project/build install

cmake/llvm-hash.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2eb709b95d8f521aa15401e159fac0729d56a677
1+
ac5dc54d509169d387fcfd495d71853d81c46484

include/triton/Analysis/Alias.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,8 @@ class SharedMemoryAliasAnalysis
9292

9393
void visitNonControlFlowArguments(
9494
Operation *op, const RegionSuccessor &successor,
95-
ValueRange successorInputs,
96-
ArrayRef<dataflow::Lattice<AliasInfo> *> argLattices,
97-
unsigned firstIndex) override;
95+
ValueRange nonSuccessorInputs,
96+
ArrayRef<dataflow::Lattice<AliasInfo> *> nonSuccessorInputLattices) override;
9897
};
9998

10099
} // namespace mlir

lib/Analysis/Alias.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,13 +60,11 @@ LogicalResult SharedMemoryAliasAnalysis::visitOperation(
6060

6161
void SharedMemoryAliasAnalysis::visitNonControlFlowArguments(
6262
Operation *op, const RegionSuccessor &successor,
63-
ValueRange successorInputs,
64-
ArrayRef<dataflow::Lattice<AliasInfo> *> argLattices, unsigned firstIndex) {
63+
ValueRange nonSuccessorInputs,
64+
ArrayRef<dataflow::Lattice<AliasInfo> *> argLattices) {
6565
auto wsOp = dyn_cast<triton::gpu::WarpSpecializePartitionsOp>(op);
6666
if (!wsOp) {
67-
setAllToEntryStates(argLattices.take_front(firstIndex));
68-
setAllToEntryStates(argLattices.drop_front(
69-
firstIndex + successorInputs.size()));
67+
setAllToEntryStates(argLattices);
7068
return;
7169
}
7270

lib/Analysis/AxisInfo.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -146,17 +146,14 @@ class AxisInfoAnalysis : public dataflow::SparseForwardDataFlowAnalysis<
146146

147147
void visitNonControlFlowArguments(
148148
Operation *op, const RegionSuccessor &successor,
149-
ValueRange successorInputs,
150-
ArrayRef<dataflow::Lattice<AxisInfo> *> argLattices,
151-
unsigned firstIndex) override {
149+
ValueRange /*nonSuccessorInputs*/,
150+
ArrayRef<dataflow::Lattice<AxisInfo> *> argLattices) override {
152151
if (auto forOp = dyn_cast<scf::ForOp>(op)) {
153152
visitForOpInductionVar(forOp, argLattices);
154153
} else if (auto ws = dyn_cast<gpu::WarpSpecializePartitionsOp>(op)) {
155154
visitWarpSpecializeExplicitCaptures(ws, successor, argLattices);
156155
} else {
157-
setAllToEntryStates(argLattices.take_front(firstIndex));
158-
setAllToEntryStates(
159-
argLattices.drop_front(firstIndex + successorInputs.size()));
156+
setAllToEntryStates(argLattices);
160157
}
161158
}
162159

lib/Target/LLVMIR/LLVMDIUtils.cpp

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
#include "lib/Target/LLVMIR/LLVMDIUtils.h"
2+
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
3+
#include "mlir/IR/BuiltinAttributes.h"
4+
#include "mlir/IR/BuiltinTypes.h"
5+
#include "mlir/IR/Location.h"
6+
#include "mlir/IR/MLIRContext.h"
7+
#include "mlir/IR/Types.h"
8+
#include "llvm/BinaryFormat/Dwarf.h"
9+
10+
namespace mlir {
11+
12+
// Note: mlir does not provided any built-in conversion from mlir::Type to
13+
// mlir::LLVM::DITypeAttr
14+
LLVM::DITypeAttr LLVMDIUtils::convertType(MLIRContext *context,
15+
mlir::Type type) {
16+
if (type.isInteger(1)) {
17+
return LLVM::DIBasicTypeAttr::get(context, llvm::dwarf::DW_TAG_base_type,
18+
mlir::StringAttr::get(context, "bool"),
19+
type.getIntOrFloatBitWidth(),
20+
llvm::dwarf::DW_ATE_boolean);
21+
}
22+
if (type.isInteger()) {
23+
return LLVM::DIBasicTypeAttr::get(context, llvm::dwarf::DW_TAG_base_type,
24+
mlir::StringAttr::get(context, "int"),
25+
type.getIntOrFloatBitWidth(),
26+
llvm::dwarf::DW_ATE_signed);
27+
} else if (type.isF16()) {
28+
return LLVM::DIBasicTypeAttr::get(context, llvm::dwarf::DW_TAG_base_type,
29+
mlir::StringAttr::get(context, "half"),
30+
type.getIntOrFloatBitWidth(),
31+
llvm::dwarf::DW_ATE_float);
32+
} else if (type.isF32()) {
33+
return LLVM::DIBasicTypeAttr::get(context, llvm::dwarf::DW_TAG_base_type,
34+
mlir::StringAttr::get(context, "float"),
35+
type.getIntOrFloatBitWidth(),
36+
llvm::dwarf::DW_ATE_float);
37+
} else if (type.isF64()) {
38+
return LLVM::DIBasicTypeAttr::get(context, llvm::dwarf::DW_TAG_base_type,
39+
mlir::StringAttr::get(context, "double"),
40+
type.getIntOrFloatBitWidth(),
41+
llvm::dwarf::DW_ATE_float);
42+
} else if (mlir::isa<mlir::VectorType>(type)) {
43+
if (auto vectorTypeSize = calcBitWidth(type); vectorTypeSize.has_value()) {
44+
return LLVM::DIBasicTypeAttr::get(
45+
context, llvm::dwarf::DW_TAG_base_type,
46+
mlir::StringAttr::get(context, "vector"), vectorTypeSize.value(),
47+
llvm::dwarf::DW_ATE_float);
48+
} else {
49+
// TODO: falling back to unknown_type, perhaps theres a better way to
50+
// handle when element type size is not determined
51+
}
52+
}
53+
return LLVM::DIBasicTypeAttr::get(
54+
context, llvm::dwarf::DW_TAG_base_type,
55+
mlir::StringAttr::get(context, "unknown_type"), 0,
56+
llvm::dwarf::DW_ATE_signed);
57+
}
58+
59+
LLVM::DITypeAttr LLVMDIUtils::convertPtrType(MLIRContext *context,
60+
LLVM::LLVMPointerType pointerType,
61+
mlir::Type pointeeType,
62+
DataLayout datalayout) {
63+
// LLVMPointerType does not include pointee info, need to pass from external
64+
// source
65+
unsigned addrSpace = pointerType.getAddressSpace();
66+
67+
unsigned sizeInBits = datalayout.getTypeSizeInBits(pointerType);
68+
LLVM::DITypeAttr diElTypeAttr = convertType(context, pointeeType);
69+
LLVM::DITypeAttr diTypeAttr = mlir::LLVM::DIDerivedTypeAttr::get(
70+
context, llvm::dwarf::DW_TAG_pointer_type,
71+
mlir::StringAttr::get(context, "pointer"), diElTypeAttr, sizeInBits,
72+
/*alignInBits=*/0, /*offset=*/0, addrSpace, mlir::LLVM::DIFlags::Zero,
73+
/*extra data=*/nullptr);
74+
return diTypeAttr;
75+
}
76+
77+
LLVM::DITypeAttr LLVMDIUtils::convertStructType(MLIRContext *context,
78+
LLVM::LLVMStructType structType,
79+
LLVM::DIFileAttr fileAttr,
80+
DataLayout datalayout,
81+
int64_t line) {
82+
83+
assert(!structType.isPacked() && !structType.isIdentified() &&
84+
"Only accepts NON-Packed and Literal struct type");
85+
86+
unsigned sizeInBits = datalayout.getTypeSizeInBits(structType);
87+
SmallVector<LLVM::DINodeAttr> elTypes;
88+
for (auto [idx, element] : llvm::enumerate(structType.getBody())) {
89+
LLVM::DITypeAttr tyAttr = convertType(context, element);
90+
elTypes.push_back(tyAttr);
91+
}
92+
93+
return LLVM::DICompositeTypeAttr::get(
94+
context, llvm::dwarf::DW_TAG_structure_type,
95+
mlir::StringAttr::get(context, "struct"), fileAttr, /*line=*/line,
96+
/*scope=*/fileAttr, /*baseType=*/nullptr, mlir::LLVM::DIFlags::Zero,
97+
sizeInBits, /*alignInBits=*/0, /*dataLocation=*/nullptr, /*rank=*/nullptr,
98+
/*allocated=*/nullptr, /*associated=*/nullptr, elTypes);
99+
}
100+
101+
LLVM::DITypeAttr LLVMDIUtils::convertArrayType(MLIRContext *context,
102+
LLVM::LLVMArrayType arrayType,
103+
LLVM::DIFileAttr fileAttr,
104+
DataLayout datalayout,
105+
int64_t line) {
106+
unsigned sizeInBits = datalayout.getTypeSizeInBits(arrayType);
107+
108+
mlir::Type elementType = arrayType.getElementType();
109+
LLVM::DITypeAttr baseType = convertType(context, elementType);
110+
SmallVector<LLVM::DINodeAttr> elTypes(arrayType.getNumElements(),
111+
convertType(context, elementType));
112+
113+
return LLVM::DICompositeTypeAttr::get(
114+
context, llvm::dwarf::DW_TAG_array_type,
115+
mlir::StringAttr::get(context, "array"), fileAttr, /*line=*/line,
116+
/*scope=*/fileAttr, /*baseType=*/baseType, mlir::LLVM::DIFlags::Zero,
117+
sizeInBits, /*alignInBits=*/0, /*dataLocation=*/nullptr, /*rank=*/nullptr,
118+
/*allocated=*/nullptr, /*associated=*/nullptr, elTypes);
119+
}
120+
121+
std::optional<unsigned> LLVMDIUtils::calcBitWidth(mlir::Type type) {
122+
if (type.isIntOrFloat()) {
123+
return type.getIntOrFloatBitWidth();
124+
} else if (mlir::isa<mlir::VectorType>(type)) {
125+
auto vectorType = dyn_cast<mlir::VectorType>(type);
126+
llvm::ArrayRef<int64_t> shape = vectorType.getShape();
127+
mlir::Type elementType = vectorType.getElementType();
128+
llvm::ArrayRef<bool> scalableDims = vectorType.getScalableDims();
129+
unsigned size = 1;
130+
for (auto i : shape) {
131+
size *= i;
132+
}
133+
134+
if (auto elementTypeSize = calcBitWidth(elementType);
135+
elementTypeSize.has_value()) {
136+
return size * elementTypeSize.value();
137+
}
138+
}
139+
140+
return std::nullopt;
141+
}
142+
143+
/// Attempt to extract a filename for the given loc.
144+
FileLineColLoc LLVMDIUtils::extractFileLoc(Location loc, bool getCaller) {
145+
if (auto fileLoc = dyn_cast<FileLineColLoc>(loc))
146+
return fileLoc;
147+
if (auto nameLoc = dyn_cast<NameLoc>(loc))
148+
return extractFileLoc(nameLoc.getChildLoc());
149+
if (auto opaqueLoc = dyn_cast<OpaqueLoc>(loc))
150+
return extractFileLoc(opaqueLoc.getFallbackLocation());
151+
if (auto fusedLoc = dyn_cast<FusedLoc>(loc))
152+
return extractFileLoc(fusedLoc.getLocations().front());
153+
if (auto callerLoc = dyn_cast<CallSiteLoc>(loc))
154+
return getCaller ? extractFileLoc(callerLoc.getCaller())
155+
: extractFileLoc(callerLoc.getCallee());
156+
StringAttr unknownFile = mlir::StringAttr::get(loc.getContext(), "<unknown>");
157+
return mlir::FileLineColLoc::get(unknownFile, 0, 0);
158+
}
159+
160+
} // namespace mlir

python/test/unit/runtime/test_autotuner.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import pathlib
88
import uuid
9-
from triton._internal_testing import is_cuda
9+
from triton._internal_testing import is_cuda, is_hip_cdna2
1010

1111

1212
def do_bench(kernel_call, quantiles, use_cuda_graph=False):
@@ -84,6 +84,7 @@ def _kernel(src, N, BLOCK_SIZE: tl.constexpr):
8484
triton.testing.assert_close(src, torch.ones_like(src))
8585

8686

87+
@pytest.mark.skipif(is_hip_cdna2(), reason="Hit LLVM assertion in splitLiveThroughBlock")
8788
def test_hooks(device):
8889
# Autotuner's pre- and post- hooks should be called the same number of times
8990
N = 4096

test/Conversion/amd/mbarrier_ops_to_llvm_gfx1250.mlir

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@ module attributes {"ttg.target" = "hip:gfx1250", "ttg.num-ctas" = 1 : i32, "ttg.
88
// GFX1250: %[[INIT_VAL1:.+]] = llvm.mlir.constant(4294967297 : i64) : i64
99
// GFX1250: %[[ALLOC_PTR:.+]] = llvm.extractvalue %arg0[0] : !llvm.struct<(ptr<3>, i32)>
1010
// GFX1250: llvm.store %[[INIT_VAL1]], %[[ALLOC_PTR]] : i64, !llvm.ptr<3>
11-
// GFX1250: rocdl.barrier
11+
// GFX1250: llvm.fence syncscope("workgroup") release
12+
// GFX1250: rocdl.s.barrier.signal{{.*}}
13+
// GFX1250: rocdl.s.barrier.wait{{.*}}
14+
// GFX1250: llvm.fence syncscope("workgroup") acquire
1215
amdg.init_barrier %alloc, 2 : !ttg.memdesc<1xi64, #shared, #smem, mutable>
1316
tt.return
1417
}

test/TritonGPU/amd/amd-range-analysis.mlir

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ module attributes {"ttg.num-warps" = 4 : i32} {
165165
// expected-remark@+2 {{unsigned : [0, 1023] signed : [0, 1023]}}
166166
// expected-remark@+1 {{non-neg}}
167167
%4 = arith.extsi %2 : tensor<1024xi32> to tensor<1024xi64>
168-
// expected-remark@+3 {{result 1: unsigned : [0, 130944] signed : [0, 130944]}}
168+
// expected-remark@+3 {{result 1: unsigned : [0, 131967] signed : [0, 131967]}}
169169
// expected-remark@+2 {{result 1: non-neg}}
170170
// expected-remark@+1 {{inferred total trip count: 128}}
171171
%5:3 = scf.for %arg2 = %c0 to %c128 step %c1 iter_args(%arg3 = %3, %arg4 = %4, %arg5 = %arg1) -> (!tt.ptr<f32>, tensor<1024xi64>, tensor<1024xf32>) {
@@ -186,7 +186,7 @@ module attributes {"ttg.num-warps" = 4 : i32} {
186186
// expected-remark@+2 {{unsigned : [0, 1023] signed : [0, 1023]}}
187187
// expected-remark@+1 {{non-neg}}
188188
%7 = arith.extsi %2 : tensor<1024xi32> to tensor<1024xi64>
189-
// expected-remark@+2 {{unsigned : [0, 131967] signed : [0, 131967]}}
189+
// expected-remark@+2 {{unsigned : [0, 132990] signed : [0, 132990]}}
190190
// expected-remark@+1 {{non-neg}}
191191
%8 = arith.addi %7, %5#1 : tensor<1024xi64>
192192
%9 = tt.splat %6 : !tt.ptr<f32> -> tensor<1024x!tt.ptr<f32>>
@@ -216,7 +216,7 @@ module attributes {"ttg.num-warps" = 4 : i32} {
216216
// expected-remark@+1 {{non-neg}}
217217
%1 = arith.muli %0, %c1024_i32 : i32
218218
%2 = tt.make_range {end = 1024 : i32, start = 0 : i32} : tensor<1024xi32>
219-
// expected-remark@+3 {{result 1: unsigned : [0, 129921] signed : [0, 129921]}}
219+
// expected-remark@+3 {{result 1: unsigned : [0, 130944] signed : [0, 130944]}}
220220
// expected-remark@+2 {{result 1: non-neg}}
221221
// expected-remark@+1 {{inferred total trip count: 128}}
222222
%3:3 = scf.for %arg2 = %c0 to %c128 step %c1 iter_args(%arg3 = %arg0, %arg4 = %cst, %arg5 = %arg1) -> (!tt.ptr<f32>, tensor<1024xi64>, tensor<1024xf32>) {
@@ -237,7 +237,7 @@ module attributes {"ttg.num-warps" = 4 : i32} {
237237
// expected-remark@+2 {{unsigned : [0, 1023] signed : [0, 1023]}}
238238
// expected-remark@+1 {{non-neg}}
239239
%5 = arith.extsi %2 : tensor<1024xi32> to tensor<1024xi64>
240-
// expected-remark@+2 {{unsigned : [0, 130944] signed : [0, 130944]}}
240+
// expected-remark@+2 {{unsigned : [0, 131967] signed : [0, 131967]}}
241241
// expected-remark@+1 {{non-neg}}
242242
%6 = arith.addi %5, %3#1 : tensor<1024xi64>
243243
%7 = tt.splat %4 : !tt.ptr<f32> -> tensor<1024x!tt.ptr<f32>>
@@ -267,11 +267,11 @@ module attributes {"ttg.num-warps" = 4 : i32} {
267267
// expected-remark@+1 {{non-neg}}
268268
%1 = arith.muli %0, %c1024_i32 : i32
269269
%2 = tt.make_range {end = 1024 : i32, start = 0 : i32} : tensor<1024xi32>
270-
// expected-remark@+3 {{result 1: unsigned : [0, 15345] signed : [0, 15345]}}
270+
// expected-remark@+3 {{result 1: unsigned : [0, 17391] signed : [0, 17391]}}
271271
// expected-remark@+2 {{result 1: non-neg}}
272272
// expected-remark@+1 {{inferred total trip count: 16}}
273273
%3:3 = scf.for %arg2 = %c0 to %c16 step %c1 iter_args(%arg3 = %arg0, %arg4 = %cst, %arg5 = %arg1) -> (!tt.ptr<f32>, tensor<1024xi64>, tensor<1024xf32>) {
274-
// expected-remark@+3 {{result 1: unsigned : [0, 260865] signed : [0, 260865]}}
274+
// expected-remark@+3 {{result 1: unsigned : [0, 261888] signed : [0, 261888]}}
275275
// expected-remark@+2 {{result 1: non-neg}}
276276
// expected-remark@+1 {{inferred total trip count: 256}}
277277
%10:3 = scf.for %arg6 = %c0 to %c16 step %c1 iter_args(%arg7 = %arg3, %arg8 = %arg4, %arg9 = %arg5) -> (!tt.ptr<f32>, tensor<1024xi64>, tensor<1024xf32>) {
@@ -294,7 +294,7 @@ module attributes {"ttg.num-warps" = 4 : i32} {
294294
// expected-remark@+2 {{unsigned : [0, 1023] signed : [0, 1023]}}
295295
// expected-remark@+1 {{non-neg}}
296296
%5 = arith.extsi %2 : tensor<1024xi32> to tensor<1024xi64>
297-
// expected-remark@+2 {{unsigned : [0, 16368] signed : [0, 16368]}}
297+
// expected-remark@+2 {{unsigned : [0, 18414] signed : [0, 18414]}}
298298
// expected-remark@+1 {{non-neg}}
299299
%6 = arith.addi %5, %3#1 : tensor<1024xi64>
300300
%7 = tt.splat %4 : !tt.ptr<f32> -> tensor<1024x!tt.ptr<f32>>
@@ -633,7 +633,7 @@ module attributes {"ttg.num-warps" = 4 : i32} {
633633
// expected-remark@+2 {{unsigned : [0, 1023] signed : [0, 1023]}}
634634
// expected-remark@+1 {{non-neg}}
635635
%3 = arith.extsi %1 : tensor<1024xi32> to tensor<1024xi64>
636-
// expected-remark@+3 {{result 1: unsigned : [0, 130944] signed : [0, 130944]}}
636+
// expected-remark@+3 {{result 1: unsigned : [0, 131967] signed : [0, 131967]}}
637637
// expected-remark@+2 {{result 1: non-neg}}
638638
// expected-remark@+1 {{inferred total trip count: 128}}
639639
%4:3 = scf.for %arg2 = %c0 to %c128 step %c1 iter_args(%arg3 = %2, %arg4 = %3, %arg5 = %arg1) -> (!tt.ptr<f32>, tensor<1024xi64>, tensor<1024xf32>) {
@@ -658,7 +658,7 @@ module attributes {"ttg.num-warps" = 4 : i32} {
658658
// expected-remark@+2 {{unsigned : [0, 1023] signed : [0, 1023]}}
659659
// expected-remark@+1 {{non-neg}}
660660
%6 = arith.extsi %1 : tensor<1024xi32> to tensor<1024xi64>
661-
// expected-remark@+2 {{unsigned : [0, 131967] signed : [0, 131967]}}
661+
// expected-remark@+2 {{unsigned : [0, 132990] signed : [0, 132990]}}
662662
// expected-remark@+1 {{non-neg}}
663663
%7 = arith.addi %6, %4#1 : tensor<1024xi64>
664664
%8 = tt.splat %5 : !tt.ptr<f32> -> tensor<1024x!tt.ptr<f32>>
@@ -751,7 +751,7 @@ module attributes {"ttg.num-warps" = 4 : i32} {
751751
// expected-remark@+2 {{unsigned : [0, 1023] signed : [0, 1023]}}
752752
// expected-remark@+1 {{non-neg}}
753753
%6 = arith.extsi %2 : tensor<1024xi32> to tensor<1024xi64>
754-
// expected-remark@+5 {{result 1: unsigned : [0, 130944] signed : [0, 130944]}}
754+
// expected-remark@+5 {{result 1: unsigned : [0, 131967] signed : [0, 131967]}}
755755
// expected-remark@+4 {{result 3: unsigned : [0, 130944] signed : [0, 130944]}}
756756
// expected-remark@+3 {{result 1: non-neg}}
757757
// expected-remark@+2 {{result 3: non-neg}}
@@ -774,7 +774,7 @@ module attributes {"ttg.num-warps" = 4 : i32} {
774774
// expected-remark@+2 {{unsigned : [0, 1023] signed : [0, 1023]}}
775775
// expected-remark@+1 {{non-neg}}
776776
%9 = arith.extsi %2 : tensor<1024xi32> to tensor<1024xi64>
777-
// expected-remark@+2 {{unsigned : [0, 131967] signed : [0, 131967]}}
777+
// expected-remark@+2 {{unsigned : [0, 132990] signed : [0, 132990]}}
778778
// expected-remark@+1 {{non-neg}}
779779
%10 = arith.addi %9, %7#1 : tensor<1024xi64>
780780
%11 = tt.splat %8 : !tt.ptr<f32> -> tensor<1024x!tt.ptr<f32>>
@@ -811,8 +811,8 @@ module attributes {"ttg.num-warps" = 4 : i32} {
811811
// expected-remark@+2 {{unsigned : [0, 1023] signed : [0, 1023]}}
812812
// expected-remark@+1 {{non-neg}}
813813
%6 = arith.extsi %2 : tensor<1024xi32> to tensor<1024xi64>
814-
// expected-remark@+5 {{result 1: unsigned : [0, 130944] signed : [0, 130944]}}
815-
// expected-remark@+4 {{result 4: unsigned : [0, 130944] signed : [0, 130944]}}
814+
// expected-remark@+5 {{result 1: unsigned : [0, 131967] signed : [0, 131967]}}
815+
// expected-remark@+4 {{result 4: unsigned : [0, 131967] signed : [0, 131967]}}
816816
// expected-remark@+3 {{result 1: non-neg}}
817817
// expected-remark@+2 {{result 4: non-neg}}
818818
// expected-remark@+1 {{inferred total trip count: 128}}
@@ -845,7 +845,7 @@ module attributes {"ttg.num-warps" = 4 : i32} {
845845
// expected-remark@+2 {{unsigned : [0, 1023] signed : [0, 1023]}}
846846
// expected-remark@+1 {{non-neg}}
847847
%9 = arith.extsi %2 : tensor<1024xi32> to tensor<1024xi64>
848-
// expected-remark@+2 {{unsigned : [0, 131967] signed : [0, 131967]}}
848+
// expected-remark@+2 {{unsigned : [0, 132990] signed : [0, 132990]}}
849849
// expected-remark@+1 {{non-neg}}
850850
%10 = arith.addi %9, %7#1 : tensor<1024xi64>
851851
%11 = tt.splat %8 : !tt.ptr<f32> -> tensor<1024x!tt.ptr<f32>>
@@ -855,7 +855,7 @@ module attributes {"ttg.num-warps" = 4 : i32} {
855855
// expected-remark@+2 {{unsigned : [0, 1023] signed : [0, 1023]}}
856856
// expected-remark@+1 {{non-neg}}
857857
%15 = arith.extsi %2 : tensor<1024xi32> to tensor<1024xi64>
858-
// expected-remark@+2 {{unsigned : [0, 131967] signed : [0, 131967]}}
858+
// expected-remark@+2 {{unsigned : [0, 132990] signed : [0, 132990]}}
859859
// expected-remark@+1 {{non-neg}}
860860
%16 = arith.addi %15, %7#4 : tensor<1024xi64>
861861
%17 = tt.splat %14 : !tt.ptr<f32> -> tensor<1024x!tt.ptr<f32>>

0 commit comments

Comments
 (0)