Skip to content

Commit a94df63

Browse files
AlexAUTpaultrojahnamd
authored andcommitted
1 parent dc8d49e commit a94df63

3 files changed

Lines changed: 4 additions & 3 deletions

File tree

cmake/llvm-hash.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
ac5dc54d509169d387fcfd495d71853d81c46484
1+
979132a02d146ec79e2f046e31877516d7f32d20

python/src/llvm.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@ createTargetMachine(llvm::Module *module, std::string proc,
5757
bool disableLLVMOpt = mlir::triton::tools::getBoolEnv("DISABLE_LLVM_OPT");
5858
if (enable_fp_fusion)
5959
opt.AllowFPOpFusion = llvm::FPOpFusion::Fast;
60-
opt.NoInfsFPMath = false;
6160
opt.NoNaNsFPMath = true;
6261
opt.TrapUnreachable = true;
6362
opt.MCOptions.AsmVerbose = true;

python/test/unit/language/test_core.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1380,7 +1380,9 @@ def kernel(X, Z):
13801380
# atom.add.bf16 is unsupported prior to Hopper so instead we generate an
13811381
# atom.cas add loop on Ampere and prior
13821382
if dst_type == 'bfloat16' and torch.cuda.get_device_capability()[0] < 9:
1383-
assert f"atom.{sem_str}.gpu.global.cas" in h.asm["ptx"]
1383+
assert "atom.relaxed.gpu.global.cas" in h.asm["ptx"]
1384+
if sem_str != "relaxed":
1385+
assert "fence.acq_rel.gpu" in h.asm["ptx"]
13841386
return
13851387

13861388
assert f"atom.global.gpu.{sem_str}" in h.asm["ptx"]

0 commit comments

Comments
 (0)