Skip to content

Commit 552306f

Browse files
authored
1 parent 924ab52 commit 552306f

3 files changed

Lines changed: 4 additions & 3 deletions

File tree

cmake/llvm-hash.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
ac5dc54d509169d387fcfd495d71853d81c46484
1+
979132a02d146ec79e2f046e31877516d7f32d20

python/src/llvm.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,6 @@ createTargetMachine(llvm::Module *module, std::string proc,
133133
bool disableLLVMOpt = mlir::triton::tools::getBoolEnv("DISABLE_LLVM_OPT");
134134
if (enable_fp_fusion)
135135
opt.AllowFPOpFusion = llvm::FPOpFusion::Fast;
136-
opt.NoInfsFPMath = false;
137136
opt.NoNaNsFPMath = true;
138137
opt.TrapUnreachable = true;
139138
opt.MCOptions.AsmVerbose = true;

python/test/unit/language/test_core.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1382,7 +1382,9 @@ def kernel(X, Z):
13821382
# atom.add.bf16 is unsupported prior to Hopper so instead we generate an
13831383
# atom.cas add loop on Ampere and prior
13841384
if dst_type == 'bfloat16' and torch.cuda.get_device_capability()[0] < 9:
1385-
assert f"atom.{sem_str}.gpu.global.cas" in h.asm["ptx"]
1385+
assert "atom.relaxed.gpu.global.cas" in h.asm["ptx"]
1386+
if sem_str != "relaxed":
1387+
assert "fence.acq_rel.gpu" in h.asm["ptx"]
13861388
return
13871389

13881390
assert f"atom.global.gpu.{sem_str}" in h.asm["ptx"]

0 commit comments

Comments
 (0)