Skip to content

Commit cd6dd7b

Browse files
committed
AMDGPU: Drop and upgrade llvm.amdgcn.atomic.csub/cond.sub to atomicrmw
1 parent 2f415bc commit cd6dd7b

32 files changed

+2277
-653
lines changed

Diff for: llvm/docs/AMDGPUUsage.rst

-5
Original file line numberDiff line numberDiff line change
@@ -1358,11 +1358,6 @@ The AMDGPU backend implements the following LLVM IR intrinsics.
13581358

13591359
The iglp_opt strategy implementations are subject to change.
13601360

1361-
llvm.amdgcn.atomic.cond.sub.u32 Provides direct access to flat_atomic_cond_sub_u32, global_atomic_cond_sub_u32
1362-
and ds_cond_sub_u32 based on address space on gfx12 targets. This
1363-
performs subtraction only if the memory value is greater than or
1364-
equal to the data value.
1365-
13661361
llvm.amdgcn.s.getpc Provides access to the s_getpc_b64 instruction, but with the return value
13671362
sign-extended from the width of the underlying PC hardware register even on
13681363
processors where the s_getpc_b64 instruction returns a zero-extended value.

Diff for: llvm/docs/ReleaseNotes.rst

+4
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,10 @@ Changes to the AArch64 Backend
7777
Changes to the AMDGPU Backend
7878
-----------------------------
7979

80+
* Removed ``llvm.amdgcn.atomic.cond.sub.u32`` and
81+
``llvm.amdgcn.atomic.csub.u32`` intrinsics. :ref:`atomicrmw <i_atomicrmw>`
82+
should be used instead with ``usub_cond`` and ``usub_sat``.
83+
8084
Changes to the ARM Backend
8185
--------------------------
8286

Diff for: llvm/include/llvm/IR/IntrinsicsAMDGPU.td

-8
Original file line numberDiff line numberDiff line change
@@ -1353,7 +1353,6 @@ def int_amdgcn_raw_buffer_atomic_or : AMDGPURawBufferAtomic;
13531353
def int_amdgcn_raw_buffer_atomic_xor : AMDGPURawBufferAtomic;
13541354
def int_amdgcn_raw_buffer_atomic_inc : AMDGPURawBufferAtomic;
13551355
def int_amdgcn_raw_buffer_atomic_dec : AMDGPURawBufferAtomic;
1356-
def int_amdgcn_raw_buffer_atomic_cond_sub_u32 : AMDGPURawBufferAtomic;
13571356
def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic<
13581357
[llvm_anyint_ty],
13591358
[LLVMMatchType<0>, // src(VGPR)
@@ -1390,7 +1389,6 @@ def int_amdgcn_raw_ptr_buffer_atomic_or : AMDGPURawPtrBufferAtomic;
13901389
def int_amdgcn_raw_ptr_buffer_atomic_xor : AMDGPURawPtrBufferAtomic;
13911390
def int_amdgcn_raw_ptr_buffer_atomic_inc : AMDGPURawPtrBufferAtomic;
13921391
def int_amdgcn_raw_ptr_buffer_atomic_dec : AMDGPURawPtrBufferAtomic;
1393-
def int_amdgcn_raw_ptr_buffer_atomic_cond_sub_u32 : AMDGPURawPtrBufferAtomic;
13941392
def int_amdgcn_raw_ptr_buffer_atomic_cmpswap : Intrinsic<
13951393
[llvm_anyint_ty],
13961394
[LLVMMatchType<0>, // src(VGPR)
@@ -1431,7 +1429,6 @@ def int_amdgcn_struct_buffer_atomic_or : AMDGPUStructBufferAtomic;
14311429
def int_amdgcn_struct_buffer_atomic_xor : AMDGPUStructBufferAtomic;
14321430
def int_amdgcn_struct_buffer_atomic_inc : AMDGPUStructBufferAtomic;
14331431
def int_amdgcn_struct_buffer_atomic_dec : AMDGPUStructBufferAtomic;
1434-
def int_amdgcn_struct_buffer_atomic_cond_sub_u32 : AMDGPUStructBufferAtomic;
14351432
def int_amdgcn_struct_buffer_atomic_cmpswap : Intrinsic<
14361433
[llvm_anyint_ty],
14371434
[LLVMMatchType<0>, // src(VGPR)
@@ -1467,7 +1464,6 @@ def int_amdgcn_struct_ptr_buffer_atomic_or : AMDGPUStructPtrBufferAtomic;
14671464
def int_amdgcn_struct_ptr_buffer_atomic_xor : AMDGPUStructPtrBufferAtomic;
14681465
def int_amdgcn_struct_ptr_buffer_atomic_inc : AMDGPUStructPtrBufferAtomic;
14691466
def int_amdgcn_struct_ptr_buffer_atomic_dec : AMDGPUStructPtrBufferAtomic;
1470-
def int_amdgcn_struct_ptr_buffer_atomic_cond_sub_u32 : AMDGPUStructPtrBufferAtomic;
14711467
def int_amdgcn_struct_ptr_buffer_atomic_cmpswap : Intrinsic<
14721468
[llvm_anyint_ty],
14731469
[LLVMMatchType<0>, // src(VGPR)
@@ -2463,8 +2459,6 @@ class AMDGPUAtomicRtn<LLVMType vt, LLVMType pt = llvm_anyptr_ty> : Intrinsic <
24632459
[IntrArgMemOnly, IntrWillReturn, NoCapture<ArgIndex<0>>, IntrNoCallback, IntrNoFree], "",
24642460
[SDNPMemOperand]>;
24652461

2466-
def int_amdgcn_global_atomic_csub : AMDGPUAtomicRtn<llvm_i32_ty>;
2467-
24682462
// uint4 llvm.amdgcn.image.bvh.intersect.ray <node_ptr>, <ray_extent>, <ray_origin>,
24692463
// <ray_dir>, <ray_inv_dir>, <texture_descr>
24702464
// <node_ptr> is i32 or i64.
@@ -2664,8 +2658,6 @@ def int_amdgcn_flat_atomic_fmax_num : AMDGPUAtomicRtn<llvm_anyfloat_ty>;
26642658
def int_amdgcn_global_atomic_fmin_num : AMDGPUAtomicRtn<llvm_anyfloat_ty>;
26652659
def int_amdgcn_global_atomic_fmax_num : AMDGPUAtomicRtn<llvm_anyfloat_ty>;
26662660

2667-
def int_amdgcn_atomic_cond_sub_u32 : AMDGPUAtomicRtn<llvm_i32_ty>;
2668-
26692661
class AMDGPULoadIntrinsic<LLVMType ptr_ty>:
26702662
Intrinsic<
26712663
[llvm_any_ty],

Diff for: llvm/lib/IR/AutoUpgrade.cpp

+7-4
Original file line numberDiff line numberDiff line change
@@ -1024,9 +1024,10 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
10241024
}
10251025

10261026
if (Name.consume_front("atomic.")) {
1027-
if (Name.starts_with("inc") || Name.starts_with("dec")) {
1028-
// These were replaced with atomicrmw uinc_wrap and udec_wrap, so
1029-
// there's no new declaration.
1027+
if (Name.starts_with("inc") || Name.starts_with("dec") ||
1028+
Name.starts_with("cond.sub") || Name.starts_with("csub")) {
1029+
// These were replaced with atomicrmw uinc_wrap, udec_wrap, usub_cond
1030+
// and usub_sat so there's no new declaration.
10301031
NewFn = nullptr;
10311032
return true;
10321033
}
@@ -4046,7 +4047,9 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
40464047
.StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
40474048
.StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
40484049
.StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4049-
.StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd);
4050+
.StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4051+
.StartsWith("atomic.cond.sub", AtomicRMWInst::USubCond)
4052+
.StartsWith("atomic.csub", AtomicRMWInst::USubSat);
40504053

40514054
unsigned NumOperands = CI->getNumOperands();
40524055
if (NumOperands < 3) // Malformed bitcode.

Diff for: llvm/lib/Target/AMDGPU/AMDGPUGISel.td

+2
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,8 @@ def : GINodeEquiv<G_AMDGPU_TBUFFER_STORE_FORMAT_D16, SItbuffer_store_d16>;
271271
// FIXME: Check MMO is atomic
272272
def : GINodeEquiv<G_ATOMICRMW_UINC_WRAP, atomic_load_uinc_wrap_glue>;
273273
def : GINodeEquiv<G_ATOMICRMW_UDEC_WRAP, atomic_load_udec_wrap_glue>;
274+
def : GINodeEquiv<G_ATOMICRMW_USUB_COND, atomic_load_usub_cond_glue>;
275+
def : GINodeEquiv<G_ATOMICRMW_USUB_SAT, atomic_load_usub_sat_glue>;
274276
def : GINodeEquiv<G_ATOMICRMW_FMIN, atomic_load_fmin_glue>;
275277
def : GINodeEquiv<G_ATOMICRMW_FMAX, atomic_load_fmax_glue>;
276278

Diff for: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -3537,6 +3537,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
35373537
case TargetOpcode::G_ATOMICRMW_UMAX:
35383538
case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
35393539
case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
3540+
case TargetOpcode::G_ATOMICRMW_USUB_COND:
3541+
case TargetOpcode::G_ATOMICRMW_USUB_SAT:
35403542
case TargetOpcode::G_ATOMICRMW_FADD:
35413543
case TargetOpcode::G_ATOMICRMW_FMIN:
35423544
case TargetOpcode::G_ATOMICRMW_FMAX:

Diff for: llvm/lib/Target/AMDGPU/AMDGPUInstructions.td

+2-4
Original file line numberDiff line numberDiff line change
@@ -626,16 +626,12 @@ defm int_amdgcn_global_atomic_fadd : global_addr_space_atomic_op;
626626
defm int_amdgcn_flat_atomic_fadd : global_addr_space_atomic_op;
627627
defm int_amdgcn_global_atomic_fmin : noret_op;
628628
defm int_amdgcn_global_atomic_fmax : noret_op;
629-
defm int_amdgcn_global_atomic_csub : noret_op;
630629
defm int_amdgcn_flat_atomic_fadd : local_addr_space_atomic_op;
631630
defm int_amdgcn_global_atomic_ordered_add_b64 : noret_op;
632631
defm int_amdgcn_flat_atomic_fmin_num : noret_op;
633632
defm int_amdgcn_flat_atomic_fmax_num : noret_op;
634633
defm int_amdgcn_global_atomic_fmin_num : noret_op;
635634
defm int_amdgcn_global_atomic_fmax_num : noret_op;
636-
defm int_amdgcn_atomic_cond_sub_u32 : local_addr_space_atomic_op;
637-
defm int_amdgcn_atomic_cond_sub_u32 : flat_addr_space_atomic_op;
638-
defm int_amdgcn_atomic_cond_sub_u32 : global_addr_space_atomic_op;
639635

640636
multiclass noret_binary_atomic_op<SDNode atomic_op> {
641637
let HasNoUse = true in
@@ -686,6 +682,8 @@ defm atomic_load_fmin : binary_atomic_op_fp_all_as<atomic_load_fmin>;
686682
defm atomic_load_fmax : binary_atomic_op_fp_all_as<atomic_load_fmax>;
687683
defm atomic_load_uinc_wrap : binary_atomic_op_all_as<atomic_load_uinc_wrap>;
688684
defm atomic_load_udec_wrap : binary_atomic_op_all_as<atomic_load_udec_wrap>;
685+
defm atomic_load_usub_cond : binary_atomic_op_all_as<atomic_load_usub_cond>;
686+
defm atomic_load_usub_sat : binary_atomic_op_all_as<atomic_load_usub_sat>;
689687
defm AMDGPUatomic_cmp_swap : binary_atomic_op_all_as<AMDGPUatomic_cmp_swap>;
690688

691689
def load_align8_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>,

Diff for: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

+7-3
Original file line numberDiff line numberDiff line change
@@ -1647,6 +1647,13 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
16471647
Atomics.legalFor({{S32, FlatPtr}, {S64, FlatPtr}});
16481648
}
16491649

1650+
auto &Atomics32 =
1651+
getActionDefinitionsBuilder({G_ATOMICRMW_USUB_COND, G_ATOMICRMW_USUB_SAT})
1652+
.legalFor({{S32, GlobalPtr}, {S32, LocalPtr}, {S32, RegionPtr}});
1653+
if (ST.hasFlatAddressSpace()) {
1654+
Atomics32.legalFor({{S32, FlatPtr}});
1655+
}
1656+
16501657
// TODO: v2bf16 operations, and fat buffer pointer support.
16511658
auto &Atomic = getActionDefinitionsBuilder(G_ATOMICRMW_FADD);
16521659
if (ST.hasLDSFPAtomicAddF32()) {
@@ -6152,9 +6159,6 @@ static unsigned getBufferAtomicPseudo(Intrinsic::ID IntrID) {
61526159
case Intrinsic::amdgcn_struct_buffer_atomic_fmax:
61536160
case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fmax:
61546161
return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX;
6155-
case Intrinsic::amdgcn_raw_buffer_atomic_cond_sub_u32:
6156-
case Intrinsic::amdgcn_struct_buffer_atomic_cond_sub_u32:
6157-
return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_COND_SUB_U32;
61586162
default:
61596163
llvm_unreachable("unhandled atomic opcode");
61606164
}

Diff for: llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp

+9-1
Original file line numberDiff line numberDiff line change
@@ -1150,7 +1150,15 @@ Value *SplitPtrStructs::handleMemoryInst(Instruction *I, Value *Arg, Value *Ptr,
11501150
case AtomicRMWInst::UIncWrap:
11511151
case AtomicRMWInst::UDecWrap:
11521152
report_fatal_error("wrapping increment/decrement not supported for "
1153-
"buffer resources and should've ben expanded away");
1153+
"buffer resources and should've been expanded away");
1154+
break;
1155+
case AtomicRMWInst::USubCond:
1156+
report_fatal_error("conditional subtract not supported for buffer "
1157+
"resources and should've been expanded away");
1158+
break;
1159+
case AtomicRMWInst::USubSat:
1160+
report_fatal_error("subtract with clamp not supported for buffer "
1161+
"resources and should've been expanded away");
11541162
break;
11551163
case AtomicRMWInst::BAD_BINOP:
11561164
llvm_unreachable("Not sure how we got a bad binop");

Diff for: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -4897,7 +4897,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
48974897
break;
48984898
}
48994899
case Intrinsic::amdgcn_global_atomic_fadd:
4900-
case Intrinsic::amdgcn_global_atomic_csub:
49014900
case Intrinsic::amdgcn_global_atomic_fmin:
49024901
case Intrinsic::amdgcn_global_atomic_fmax:
49034902
case Intrinsic::amdgcn_global_atomic_fmin_num:
@@ -4907,7 +4906,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
49074906
case Intrinsic::amdgcn_flat_atomic_fmax:
49084907
case Intrinsic::amdgcn_flat_atomic_fmin_num:
49094908
case Intrinsic::amdgcn_flat_atomic_fmax_num:
4910-
case Intrinsic::amdgcn_atomic_cond_sub_u32:
49114909
case Intrinsic::amdgcn_global_atomic_ordered_add_b64:
49124910
case Intrinsic::amdgcn_global_load_tr_b64:
49134911
case Intrinsic::amdgcn_global_load_tr_b128:
@@ -5234,6 +5232,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
52345232
case AMDGPU::G_ATOMICRMW_FMAX:
52355233
case AMDGPU::G_ATOMICRMW_UINC_WRAP:
52365234
case AMDGPU::G_ATOMICRMW_UDEC_WRAP:
5235+
case AMDGPU::G_ATOMICRMW_USUB_COND:
5236+
case AMDGPU::G_ATOMICRMW_USUB_SAT:
52375237
case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG: {
52385238
OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
52395239
OpdsMapping[1] = getValueMappingForPtr(MRI, MI.getOperand(1).getReg());

Diff for: llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td

-6
Original file line numberDiff line numberDiff line change
@@ -237,8 +237,6 @@ def : SourceOfDivergence<int_amdgcn_mbcnt_lo>;
237237
def : SourceOfDivergence<int_r600_read_tidig_x>;
238238
def : SourceOfDivergence<int_r600_read_tidig_y>;
239239
def : SourceOfDivergence<int_r600_read_tidig_z>;
240-
def : SourceOfDivergence<int_amdgcn_atomic_cond_sub_u32>;
241-
def : SourceOfDivergence<int_amdgcn_global_atomic_csub>;
242240
def : SourceOfDivergence<int_amdgcn_global_atomic_fadd>;
243241
def : SourceOfDivergence<int_amdgcn_global_atomic_fmin>;
244242
def : SourceOfDivergence<int_amdgcn_global_atomic_fmax>;
@@ -266,7 +264,6 @@ def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_fadd>;
266264
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_fmin>;
267265
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_fmax>;
268266
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_cmpswap>;
269-
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_cond_sub_u32>;
270267
def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_swap>;
271268
def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_add>;
272269
def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_sub>;
@@ -283,7 +280,6 @@ def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_fadd>;
283280
def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_fmin>;
284281
def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_fmax>;
285282
def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_cmpswap>;
286-
def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_cond_sub_u32>;
287283
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_swap>;
288284
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_add>;
289285
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_sub>;
@@ -300,7 +296,6 @@ def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_fadd>;
300296
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_fmin>;
301297
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_fmax>;
302298
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_cmpswap>;
303-
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_cond_sub_u32>;
304299
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_swap>;
305300
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_add>;
306301
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_sub>;
@@ -317,7 +312,6 @@ def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_fadd>;
317312
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_fmin>;
318313
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_fmax>;
319314
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_cmpswap>;
320-
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_cond_sub_u32>;
321315
def : SourceOfDivergence<int_amdgcn_ps_live>;
322316
def : SourceOfDivergence<int_amdgcn_live_mask>;
323317
def : SourceOfDivergence<int_amdgcn_ds_swizzle>;

Diff for: llvm/lib/Target/AMDGPU/BUFInstructions.td

+1-1
Original file line numberDiff line numberDiff line change
@@ -1128,7 +1128,7 @@ defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Pseudo_Atomics <
11281128

11291129
let OtherPredicates = [HasGFX10_BEncoding] in {
11301130
defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics <
1131-
"buffer_atomic_csub", VGPR_32, i32, int_amdgcn_global_atomic_csub
1131+
"buffer_atomic_csub", VGPR_32, i32
11321132
>;
11331133
}
11341134

Diff for: llvm/lib/Target/AMDGPU/DSInstructions.td

+35-11
Original file line numberDiff line numberDiff line change
@@ -734,17 +734,6 @@ defm DS_COND_SUB_RTN_U32 : DS_1A1D_RET_mc<"ds_cond_sub_rtn_u32", VGPR_32>;
734734
defm DS_SUB_CLAMP_U32 : DS_1A1D_NORET_mc<"ds_sub_clamp_u32">;
735735
defm DS_SUB_CLAMP_RTN_U32 : DS_1A1D_RET_mc<"ds_sub_clamp_rtn_u32", VGPR_32>;
736736

737-
multiclass DSAtomicRetNoRetPatIntrinsic_mc<DS_Pseudo inst, DS_Pseudo noRetInst,
738-
ValueType vt, string frag> {
739-
def : DSAtomicRetPat<inst, vt,
740-
!cast<PatFrag>(frag#"_local_addrspace")>;
741-
742-
let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
743-
def : DSAtomicRetPat<noRetInst, vt,
744-
!cast<PatFrag>(frag#"_noret_local_addrspace"), /* complexity */ 1>;
745-
}
746-
747-
defm : DSAtomicRetNoRetPatIntrinsic_mc<DS_COND_SUB_RTN_U32, DS_COND_SUB_U32, i32, "int_amdgcn_atomic_cond_sub_u32">;
748737
} // let SubtargetPredicate = isGFX12Plus
749738

750739
//===----------------------------------------------------------------------===//
@@ -1006,7 +995,34 @@ multiclass DSAtomicRetNoRetPat_mc<DS_Pseudo inst, DS_Pseudo noRetInst,
1006995
}
1007996
}
1008997

998+
multiclass DSAtomicRetNoRetPatCondSub_mc<DS_Pseudo inst, DS_Pseudo noRetInst,
999+
ValueType vt, string frag> {
1000+
let OtherPredicates = [LDSRequiresM0Init] in {
1001+
def : DSAtomicRetPat<inst, vt,
1002+
!cast<PatFrag>(frag#"_local_m0_"#vt)>;
1003+
let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
1004+
def : DSAtomicRetPat<noRetInst, vt,
1005+
!cast<PatFrag>(frag#"_local_m0_noret_"#vt), /* complexity */ 1>;
1006+
}
1007+
1008+
let OtherPredicates = [NotLDSRequiresM0Init] in {
1009+
def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
1010+
!cast<PatFrag>(frag#"_local_"#vt)>;
1011+
let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
1012+
def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt,
1013+
!cast<PatFrag>(frag#"_local_noret_"#vt), /* complexity */ 1>;
1014+
}
10091015

1016+
let OtherPredicates = [HasGDS] in {
1017+
def : DSAtomicRetPat<inst, vt,
1018+
!cast<PatFrag>(frag#"_region_m0_"#vt),
1019+
/* complexity */ 0, /* gds */ 1>;
1020+
let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
1021+
def : DSAtomicRetPat<noRetInst, vt,
1022+
!cast<PatFrag>(frag#"_region_m0_noret_"#vt),
1023+
/* complexity */ 1, /* gds */ 1>;
1024+
}
1025+
}
10101026

10111027
let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in {
10121028
// Caution, the order of src and cmp is the *opposite* of the BUFFER_ATOMIC_CMPSWAP opcode.
@@ -1089,6 +1105,14 @@ defm : DSAtomicRetNoRetPat_mc<DS_PK_ADD_RTN_F16, DS_PK_ADD_F16, v2f16, "atomic_l
10891105
defm : DSAtomicRetNoRetPat_mc<DS_PK_ADD_RTN_BF16, DS_PK_ADD_BF16, v2bf16, "atomic_load_fadd">;
10901106
}
10911107

1108+
let SubtargetPredicate = isGFX12Plus in {
1109+
1110+
defm : DSAtomicRetNoRetPatCondSub_mc<DS_COND_SUB_RTN_U32, DS_COND_SUB_U32, i32, "atomic_load_usub_cond">;
1111+
1112+
defm : DSAtomicRetNoRetPat_mc<DS_SUB_CLAMP_RTN_U32, DS_SUB_CLAMP_U32, i32, "atomic_load_usub_sat">;
1113+
1114+
} // let SubtargetPredicate = isGFX12Plus
1115+
10921116
let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in {
10931117
defm : DSAtomicCmpXChgSwapped_mc<DS_CMPST_RTN_B32, DS_CMPST_B32, i32, "atomic_cmp_swap">;
10941118
}

0 commit comments

Comments
 (0)