triton-lang · ThomasRaoux · Feb 12, 2026 · Feb 6, 2026 · Feb 11, 2026 · Feb 11, 2026
@@ -1 +1 @@
-ac5dc54d509169d387fcfd495d71853d81c46484
+979132a02d146ec79e2f046e31877516d7f32d20
@@ -133,7 +133,6 @@ createTargetMachine(llvm::Module *module, std::string proc,
   bool disableLLVMOpt = mlir::triton::tools::getBoolEnv("DISABLE_LLVM_OPT");
   if (enable_fp_fusion)
     opt.AllowFPOpFusion = llvm::FPOpFusion::Fast;
-  opt.NoInfsFPMath = false;
   opt.NoNaNsFPMath = true;
   opt.TrapUnreachable = true;
   opt.MCOptions.AsmVerbose = true;

@@ -1382,7 +1382,9 @@ def kernel(X, Z):
     # atom.add.bf16 is unsupported prior to Hopper so instead we generate an
     # atom.cas add loop on Ampere and prior
     if dst_type == 'bfloat16' and torch.cuda.get_device_capability()[0] < 9:
-        assert f"atom.{sem_str}.gpu.global.cas" in h.asm["ptx"]
+        assert "atom.relaxed.gpu.global.cas" in h.asm["ptx"]
+        if sem_str != "relaxed":
+            assert "fence.acq_rel.gpu" in h.asm["ptx"]
         return
 
     assert f"atom.global.gpu.{sem_str}" in h.asm["ptx"]
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		ac5dc54d509169d387fcfd495d71853d81c46484
		979132a02d146ec79e2f046e31877516d7f32d20