Skip to content

Commit 6dd6d82

Browse files
authored
[CIR][CIRGen] Support for builtin __atomic_thread_fence (#1287)
Fix #1274 Implements atomic thread fence synchronization primitive corresponding to `atomic.thread_fence` CIR.
1 parent e342308 commit 6dd6d82

File tree

5 files changed

+244
-1
lines changed

5 files changed

+244
-1
lines changed

clang/include/clang/CIR/Dialect/IR/CIROps.td

+43
Original file line numberDiff line numberDiff line change
@@ -5411,6 +5411,49 @@ def AtomicCmpXchg : CIR_Op<"atomic.cmp_xchg",
54115411
let hasVerifier = 0;
54125412
}
54135413

5414+
def MemScope_SingleThread : I32EnumAttrCase<"MemScope_SingleThread",
5415+
0, "single_thread">;
5416+
def MemScope_System : I32EnumAttrCase<"MemScope_System",
5417+
1, "system">;
5418+
5419+
def MemScopeKind : I32EnumAttr<
5420+
"MemScopeKind",
5421+
"Memory Scope Enumeration",
5422+
[MemScope_SingleThread, MemScope_System]> {
5423+
let cppNamespace = "::cir";
5424+
}
5425+
5426+
def AtomicFence : CIR_Op<"atomic.fence"> {
5427+
let summary = "Atomic thread fence";
5428+
let description = [{
5429+
C/C++ Atomic thread fence synchronization primitive. Implements the builtin
5430+
`__atomic_thread_fence` which enforces memory ordering constraints across
5431+
threads within the specified synchronization scope.
5432+
5433+
This handles all variations including:
5434+
- `__atomic_thread_fence`
5435+
- `__atomic_signal_fence`
5436+
- `__c11_atomic_thread_fence`
5437+
- `__c11_atomic_signal_fence`
5438+
5439+
Example:
5440+
```mlir
5441+
cir.atomic.fence system seq_cst
5442+
cir.atomic.fence single_thread seq_cst
5443+
```
5444+
5445+
}];
5446+
let results = (outs);
5447+
let arguments = (ins Arg<MemScopeKind, "sync scope">:$sync_scope,
5448+
Arg<MemOrder, "memory order">:$ordering);
5449+
5450+
let assemblyFormat = [{
5451+
$sync_scope $ordering attr-dict
5452+
}];
5453+
5454+
let hasVerifier = 0;
5455+
}
5456+
54145457
def SignBitOp : CIR_Op<"signbit", [Pure]> {
54155458
let summary = "Checks the sign of a floating-point number";
54165459
let description = [{

clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp

+35-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,11 @@
1616
#include "CIRGenCstEmitter.h"
1717
#include "CIRGenFunction.h"
1818
#include "CIRGenModule.h"
19+
#include "CIRGenValue.h"
1920
#include "TargetInfo.h"
21+
#include "clang/AST/Expr.h"
22+
#include "clang/CIR/Dialect/IR/CIRAttrs.h"
23+
#include "clang/CIR/Dialect/IR/CIROpsEnums.h"
2024
#include "clang/CIR/MissingFeatures.h"
2125

2226
// TODO(cir): we shouldn't need this but we currently reuse intrinsic IDs for
@@ -30,7 +34,9 @@
3034
#include "clang/Frontend/FrontendDiagnostic.h"
3135

3236
#include "mlir/Dialect/Func/IR/FuncOps.h"
37+
#include "mlir/IR/BuiltinAttributes.h"
3338
#include "mlir/IR/Value.h"
39+
#include "mlir/Support/LLVM.h"
3440
#include "clang/CIR/Dialect/IR/CIRDialect.h"
3541
#include "llvm/Support/ErrorHandling.h"
3642

@@ -333,6 +339,30 @@ static mlir::Value MakeAtomicCmpXchgValue(CIRGenFunction &cgf,
333339
return returnBool ? op.getResult(1) : op.getResult(0);
334340
}
335341

342+
static mlir::Value makeAtomicFenceValue(CIRGenFunction &cgf,
343+
const CallExpr *expr,
344+
cir::MemScopeKind syncScope) {
345+
auto &builder = cgf.getBuilder();
346+
mlir::Value orderingVal = cgf.emitScalarExpr(expr->getArg(0));
347+
348+
auto constOrdering =
349+
mlir::dyn_cast<cir::ConstantOp>(orderingVal.getDefiningOp());
350+
if (!constOrdering)
351+
llvm_unreachable("NYI: variable ordering not supported");
352+
353+
auto constOrderingAttr =
354+
mlir::dyn_cast<cir::IntAttr>(constOrdering.getValue());
355+
if (constOrderingAttr) {
356+
cir::MemOrder ordering =
357+
static_cast<cir::MemOrder>(constOrderingAttr.getUInt());
358+
359+
builder.create<cir::AtomicFence>(cgf.getLoc(expr->getSourceRange()),
360+
syncScope, ordering);
361+
}
362+
363+
return mlir::Value();
364+
}
365+
336366
static bool
337367
typeRequiresBuiltinLaunderImp(const ASTContext &astContext, QualType ty,
338368
llvm::SmallPtrSetImpl<const Decl *> &seen) {
@@ -1863,10 +1893,14 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
18631893
llvm_unreachable("BI__atomic_clear NYI");
18641894

18651895
case Builtin::BI__atomic_thread_fence:
1896+
return RValue::get(
1897+
makeAtomicFenceValue(*this, E, cir::MemScopeKind::MemScope_System));
18661898
case Builtin::BI__atomic_signal_fence:
1899+
return RValue::get(makeAtomicFenceValue(
1900+
*this, E, cir::MemScopeKind::MemScope_SingleThread));
18671901
case Builtin::BI__c11_atomic_thread_fence:
18681902
case Builtin::BI__c11_atomic_signal_fence:
1869-
llvm_unreachable("BI__atomic_thread_fence like NYI");
1903+
llvm_unreachable("BI__c11_atomic_thread_fence like NYI");
18701904

18711905
case Builtin::BI__builtin_signbit:
18721906
case Builtin::BI__builtin_signbitf:

clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp

+22
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
1919
#include "mlir/Dialect/DLTI/DLTI.h"
2020
#include "mlir/Dialect/Func/IR/FuncOps.h"
21+
#include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
22+
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
2123
#include "mlir/Dialect/LLVMIR/Transforms/Passes.h"
2224
#include "mlir/IR/Attributes.h"
2325
#include "mlir/IR/Builders.h"
@@ -41,6 +43,8 @@
4143
#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
4244
#include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h"
4345
#include "mlir/Target/LLVMIR/Export.h"
46+
#include "clang/CIR/Dialect/IR/CIRDialect.h"
47+
#include "clang/CIR/Dialect/IR/CIROpsEnums.h"
4448
#include "clang/CIR/Dialect/Passes.h"
4549
#include "clang/CIR/LoweringHelpers.h"
4650
#include "clang/CIR/MissingFeatures.h"
@@ -3176,6 +3180,11 @@ mlir::LLVM::AtomicOrdering getLLVMAtomicOrder(cir::MemOrder memo) {
31763180
llvm_unreachable("shouldn't get here");
31773181
}
31783182

3183+
llvm::StringRef getLLVMSyncScope(cir::MemScopeKind syncScope) {
3184+
return syncScope == cir::MemScopeKind::MemScope_SingleThread ? "singlethread"
3185+
: "";
3186+
}
3187+
31793188
mlir::LogicalResult CIRToLLVMAtomicCmpXchgLowering::matchAndRewrite(
31803189
cir::AtomicCmpXchg op, OpAdaptor adaptor,
31813190
mlir::ConversionPatternRewriter &rewriter) const {
@@ -3344,6 +3353,18 @@ mlir::LogicalResult CIRToLLVMAtomicFetchLowering::matchAndRewrite(
33443353
return mlir::success();
33453354
}
33463355

3356+
mlir::LogicalResult CIRToLLVMAtomicFenceLowering::matchAndRewrite(
3357+
cir::AtomicFence op, OpAdaptor adaptor,
3358+
mlir::ConversionPatternRewriter &rewriter) const {
3359+
auto llvmOrder = getLLVMAtomicOrder(adaptor.getOrdering());
3360+
auto llvmSyncScope = getLLVMSyncScope(adaptor.getSyncScope());
3361+
3362+
rewriter.replaceOpWithNewOp<mlir::LLVM::FenceOp>(op, llvmOrder,
3363+
llvmSyncScope);
3364+
3365+
return mlir::success();
3366+
}
3367+
33473368
mlir::LogicalResult CIRToLLVMByteswapOpLowering::matchAndRewrite(
33483369
cir::ByteswapOp op, OpAdaptor adaptor,
33493370
mlir::ConversionPatternRewriter &rewriter) const {
@@ -4105,6 +4126,7 @@ void populateCIRToLLVMConversionPatterns(
41054126
CIRToLLVMAtomicCmpXchgLowering,
41064127
CIRToLLVMAtomicFetchLowering,
41074128
CIRToLLVMAtomicXchgLowering,
4129+
CIRToLLVMAtomicFenceLowering,
41084130
CIRToLLVMBaseClassAddrOpLowering,
41094131
CIRToLLVMBinOpLowering,
41104132
CIRToLLVMBinOpOverflowOpLowering,

clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h

+11
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "mlir/IR/MLIRContext.h"
1717
#include "mlir/Interfaces/DataLayoutInterfaces.h"
1818
#include "mlir/Transforms/DialectConversion.h"
19+
#include "clang/CIR/Dialect/IR/CIRDialect.h"
1920

2021
namespace cir {
2122
namespace direct {
@@ -822,6 +823,16 @@ class CIRToLLVMAtomicFetchLowering
822823
mlir::ConversionPatternRewriter &) const override;
823824
};
824825

826+
class CIRToLLVMAtomicFenceLowering
827+
: public mlir::OpConversionPattern<cir::AtomicFence> {
828+
public:
829+
using mlir::OpConversionPattern<cir::AtomicFence>::OpConversionPattern;
830+
831+
mlir::LogicalResult
832+
matchAndRewrite(cir::AtomicFence op, OpAdaptor,
833+
mlir::ConversionPatternRewriter &) const override;
834+
};
835+
825836
class CIRToLLVMByteswapOpLowering
826837
: public mlir::OpConversionPattern<cir::ByteswapOp> {
827838
public:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
2+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
3+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
4+
// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
5+
6+
7+
struct Data {
8+
int value;
9+
void *ptr;
10+
};
11+
12+
typedef struct Data *DataPtr;
13+
14+
void applyThreadFence() {
15+
__atomic_thread_fence(__ATOMIC_SEQ_CST);
16+
}
17+
18+
// CIR-LABEL: @applyThreadFence
19+
// CIR: cir.atomic.fence system seq_cst
20+
// CIR: cir.return
21+
22+
// LLVM-LABEL: @applyThreadFence
23+
// LLVM: fence seq_cst
24+
// LLVM: ret void
25+
26+
void applySignalFence() {
27+
__atomic_signal_fence(__ATOMIC_SEQ_CST);
28+
}
29+
// CIR-LABEL: @applySignalFence
30+
// CIR: cir.atomic.fence single_thread seq_cst
31+
// CIR: cir.return
32+
33+
// LLVM-LABEL: @applySignalFence
34+
// LLVM: fence syncscope("singlethread") seq_cst
35+
// LLVM: ret void
36+
37+
void modifyWithThreadFence(DataPtr d) {
38+
__atomic_thread_fence(__ATOMIC_SEQ_CST);
39+
d->value = 42;
40+
}
41+
// CIR-LABEL: @modifyWithThreadFence
42+
// CIR: %[[DATA:.*]] = cir.alloca !cir.ptr<!ty_Data>, !cir.ptr<!cir.ptr<!ty_Data>>, ["d", init] {alignment = 8 : i64}
43+
// CIR: cir.atomic.fence system seq_cst
44+
// CIR: %[[VAL_42:.*]] = cir.const #cir.int<42> : !s32i
45+
// CIR: %[[LOAD_DATA:.*]] = cir.load %[[DATA]] : !cir.ptr<!cir.ptr<!ty_Data>>, !cir.ptr<!ty_Data>
46+
// CIR: %[[DATA_VALUE:.*]] = cir.get_member %[[LOAD_DATA]][0] {name = "value"} : !cir.ptr<!ty_Data> -> !cir.ptr<!s32i>
47+
// CIR: cir.store %[[VAL_42]], %[[DATA_VALUE]] : !s32i, !cir.ptr<!s32i>
48+
// CIR: cir.return
49+
50+
// LLVM-LABEL: @modifyWithThreadFence
51+
// LLVM: %[[DATA:.*]] = alloca ptr, i64 1, align 8
52+
// LLVM: fence seq_cst
53+
// LLVM: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
54+
// LLVM: %[[DATA_VALUE:.*]] = getelementptr %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 0
55+
// LLVM: store i32 42, ptr %[[DATA_VALUE]], align 4
56+
// LLVM: ret void
57+
58+
void modifyWithSignalFence(DataPtr d) {
59+
__atomic_signal_fence(__ATOMIC_SEQ_CST);
60+
d->value = 24;
61+
}
62+
// CIR-LABEL: @modifyWithSignalFence
63+
// CIR: %[[DATA:.*]] = cir.alloca !cir.ptr<!ty_Data>, !cir.ptr<!cir.ptr<!ty_Data>>, ["d", init] {alignment = 8 : i64}
64+
// CIR: cir.atomic.fence single_thread seq_cst
65+
// CIR: %[[VAL_42:.*]] = cir.const #cir.int<24> : !s32i
66+
// CIR: %[[LOAD_DATA:.*]] = cir.load %[[DATA]] : !cir.ptr<!cir.ptr<!ty_Data>>, !cir.ptr<!ty_Data>
67+
// CIR: %[[DATA_VALUE:.*]] = cir.get_member %[[LOAD_DATA]][0] {name = "value"} : !cir.ptr<!ty_Data> -> !cir.ptr<!s32i>
68+
// CIR: cir.store %[[VAL_42]], %[[DATA_VALUE]] : !s32i, !cir.ptr<!s32i>
69+
// CIR: cir.return
70+
71+
// LLVM-LABEL: @modifyWithSignalFence
72+
// LLVM: %[[DATA:.*]] = alloca ptr, i64 1, align 8
73+
// LLVM: fence syncscope("singlethread") seq_cst
74+
// LLVM: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
75+
// LLVM: %[[DATA_VALUE:.*]] = getelementptr %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 0
76+
// LLVM: store i32 24, ptr %[[DATA_VALUE]], align 4
77+
// LLVM: ret void
78+
79+
void loadWithThreadFence(DataPtr d) {
80+
__atomic_thread_fence(__ATOMIC_SEQ_CST);
81+
__atomic_load_n(&d->ptr, __ATOMIC_SEQ_CST);
82+
}
83+
// CIR-LABEL: @loadWithThreadFence
84+
// CIR: %[[DATA:.*]] = cir.alloca !cir.ptr<!ty_Data>, !cir.ptr<!cir.ptr<!ty_Data>>, ["d", init] {alignment = 8 : i64}
85+
// CIR: %[[ATOMIC_TEMP:.*]] = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["atomic-temp"] {alignment = 8 : i64}
86+
// CIR: cir.atomic.fence system seq_cst
87+
// CIR: %[[LOAD_DATA:.*]] = cir.load %[[DATA]] : !cir.ptr<!cir.ptr<!ty_Data>>, !cir.ptr<!ty_Data>
88+
// CIR: %[[DATA_VALUE:.*]] = cir.get_member %[[LOAD_DATA]][1] {name = "ptr"} : !cir.ptr<!ty_Data> -> !cir.ptr<!cir.ptr<!void>>
89+
// CIR: %[[CASTED_DATA_VALUE:.*]] = cir.cast(bitcast, %[[DATA_VALUE]] : !cir.ptr<!cir.ptr<!void>>), !cir.ptr<!u64i>
90+
// CIR: %[[ATOMIC_LOAD:.*]] = cir.load atomic(seq_cst) %[[CASTED_DATA_VALUE]] : !cir.ptr<!u64i>, !u64i
91+
// CIR: %[[CASTED_ATOMIC_TEMP:.*]] = cir.cast(bitcast, %[[ATOMIC_TEMP]] : !cir.ptr<!cir.ptr<!void>>), !cir.ptr<!u64i>
92+
// CIR: cir.store %[[ATOMIC_LOAD]], %[[CASTED_ATOMIC_TEMP]] : !u64i, !cir.ptr<!u64i>
93+
// CIR: %[[ATOMIC_LOAD_PTR:.*]] = cir.load %[[ATOMIC_TEMP]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
94+
// CIR: cir.return
95+
96+
// LLVM-LABEL: @loadWithThreadFence
97+
// LLVM: %[[DATA:.*]] = alloca ptr, i64 1, align 8
98+
// LLVM: %[[DATA_TEMP:.*]] = alloca ptr, i64 1, align 8
99+
// LLVM: fence seq_cst
100+
// LLVM: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
101+
// LLVM: %[[DATA_VALUE:.*]] = getelementptr %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 1
102+
// LLVM: %[[ATOMIC_LOAD:.*]] = load atomic i64, ptr %[[DATA_VALUE]] seq_cst, align 8
103+
// LLVM: store i64 %[[ATOMIC_LOAD]], ptr %[[DATA_TEMP]], align 8
104+
// LLVM: %[[DATA_TEMP_LOAD:.*]] = load ptr, ptr %[[DATA_TEMP]], align 8
105+
// LLVM: ret void
106+
107+
void loadWithSignalFence(DataPtr d) {
108+
__atomic_signal_fence(__ATOMIC_SEQ_CST);
109+
__atomic_load_n(&d->ptr, __ATOMIC_SEQ_CST);
110+
}
111+
// CIR-LABEL: @loadWithSignalFence
112+
// CIR: %[[DATA:.*]] = cir.alloca !cir.ptr<!ty_Data>, !cir.ptr<!cir.ptr<!ty_Data>>, ["d", init] {alignment = 8 : i64}
113+
// CIR: %[[ATOMIC_TEMP:.*]] = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["atomic-temp"] {alignment = 8 : i64}
114+
// CIR: cir.atomic.fence single_thread seq_cst
115+
// CIR: %[[LOAD_DATA:.*]] = cir.load %[[DATA]] : !cir.ptr<!cir.ptr<!ty_Data>>, !cir.ptr<!ty_Data>
116+
// CIR: %[[DATA_PTR:.*]] = cir.get_member %[[LOAD_DATA]][1] {name = "ptr"} : !cir.ptr<!ty_Data> -> !cir.ptr<!cir.ptr<!void>>
117+
// CIR: %[[CASTED_DATA_PTR:.*]] = cir.cast(bitcast, %[[DATA_PTR]] : !cir.ptr<!cir.ptr<!void>>), !cir.ptr<!u64i>
118+
// CIR: %[[ATOMIC_LOAD:.*]] = cir.load atomic(seq_cst) %[[CASTED_DATA_PTR]] : !cir.ptr<!u64i>, !u64i
119+
// CIR: %[[CASTED_ATOMIC_TEMP:.*]] = cir.cast(bitcast, %[[ATOMIC_TEMP]] : !cir.ptr<!cir.ptr<!void>>), !cir.ptr<!u64i>
120+
// CIR: cir.store %[[ATOMIC_LOAD]], %[[CASTED_ATOMIC_TEMP]] : !u64i, !cir.ptr<!u64i>
121+
// CIR: %[[LOAD_ATOMIC_TEMP:.*]] = cir.load %[[ATOMIC_TEMP]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
122+
// CIR: cir.return
123+
124+
// LLVM-LABEL: @loadWithSignalFence
125+
// LLVM: %[[DATA:.*]] = alloca ptr, i64 1, align 8
126+
// LLVM: %[[DATA_TEMP:.*]] = alloca ptr, i64 1, align 8
127+
// LLVM: fence syncscope("singlethread") seq_cst
128+
// LLVM: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
129+
// LLVM: %[[DATA_VALUE:.*]] = getelementptr %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 1
130+
// LLVM: %[[ATOMIC_LOAD:.*]] = load atomic i64, ptr %[[DATA_VALUE]] seq_cst, align 8
131+
// LLVM: store i64 %[[ATOMIC_LOAD]], ptr %[[DATA_TEMP]], align 8
132+
// LLVM: %[[DATA_TEMP_LOAD]] = load ptr, ptr %[[DATA_TEMP]], align 8
133+
// LLVM: ret void

0 commit comments

Comments
 (0)