Skip to content

Commit 2c39ad9

Browse files
committed
[CIR][CIRGen][TBAA] Add support for pointer tbaa
1 parent 52fd4a2 commit 2c39ad9

File tree

9 files changed

+243
-35
lines changed

9 files changed

+243
-35
lines changed

clang/include/clang/CIR/Dialect/IR/CIRTBAAAttrs.td

+17-1
Original file line numberDiff line numberDiff line change
@@ -22,22 +22,38 @@ def CIR_TBAAOmnipotentChar
2222
def CIR_TBAAScalarAttr : CIR_Attr<"TBAAScalar", "tbaa_scalar", [], "TBAAAttr"> {
2323
let summary = "Describes a scalar type in TBAA with an identifier.";
2424

25-
let parameters = (ins StringRefParameter<> : $id, CIR_AnyType : $type);
25+
let parameters = (ins StringRefParameter<>:$id,
26+
CIR_AnyType:$type,
27+
OptionalParameter<"cir::TBAAScalarAttr">:$parent);
2628

2729
let description = [{
2830
Define a TBAA scalar attribute.
31+
The optional `parent` attribute is used to describe the parent type of the
32+
scalar type. If the `parent` is null or omitted, the parent type is the
33+
`omnipotent char` type.
2934

3035
Example:
3136
```mlir
3237
// CIR_TBAAScalarAttr
3338
#tbaa_scalar = #cir.tbaa_scalar<id = "int", type = !s32i>
3439
#tbaa_scalar1 = #cir.tbaa_scalar<id = "long long", type = !s64i>
40+
41+
#tbaa_scalar2 = #cir.tbaa_scalar<id = "any pointer", type = !cir.ptr<!s32i>>
42+
#tbaa_scalar3 = #cir.tbaa_scalar<id = "p1 int", type = !cir.ptr<!s32i>,
43+
parent = #tbaa_scalar2>
3544
```
3645

3746
See the following link for more details:
3847
https://llvm.org/docs/LangRef.html#tbaa-metadata
3948
}];
4049

50+
let builders = [
51+
AttrBuilder<(ins "llvm::StringRef":$id,
52+
"mlir::Type":$type), [{
53+
return $_get($_ctxt, id, type, /*parent =*/ nullptr);
54+
}]>
55+
];
56+
4157
let assemblyFormat = "`<` struct(params) `>`";
4258
}
4359

clang/include/clang/CIR/MissingFeatures.h

-1
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,6 @@ struct MissingFeatures {
6565
static bool tbaaMergeTBAAInfo() { return false; }
6666
static bool tbaaMayAlias() { return false; }
6767
static bool tbaaNewStructPath() { return false; }
68-
static bool tbaaPointer() { return false; }
6968
static bool emitNullabilityCheck() { return false; }
7069
static bool ptrAuth() { return false; }
7170
static bool memberFuncPtrAuthInfo() { return false; }

clang/lib/CIR/CodeGen/CIRGenTBAA.cpp

+72-6
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ static bool isValidBaseType(clang::QualType qty) {
6565
return false;
6666
}
6767

68-
cir::TBAAAttr CIRGenTBAA::getScalarTypeInfo(clang::QualType qty) {
68+
cir::TBAAScalarAttr CIRGenTBAA::getScalarTypeInfo(clang::QualType qty) {
6969
const clang::Type *ty = astContext.getCanonicalType(qty).getTypePtr();
7070
assert(mlir::isa<clang::BuiltinType>(ty));
7171
const clang::BuiltinType *bty = mlir::dyn_cast<BuiltinType>(ty);
@@ -159,12 +159,78 @@ cir::TBAAAttr CIRGenTBAA::getTypeInfoHelper(clang::QualType qty) {
159159
// they involve a significant representation difference. We don't
160160
// currently do so, however.
161161
if (ty->isPointerType() || ty->isReferenceType()) {
162-
if (!codeGenOpts.PointerTBAA) {
163-
return cir::TBAAScalarAttr::get(mlirContext, "any pointer",
164-
types.convertType(qty));
162+
auto anyPtr = cir::TBAAScalarAttr::get(mlirContext, "any pointer",
163+
types.convertType(qty));
164+
if (!codeGenOpts.PointerTBAA)
165+
return anyPtr;
166+
// C++ [basic.lval]p11 permits objects to accessed through an l-value of
167+
// similar type. Two types are similar under C++ [conv.qual]p2 if the
168+
// decomposition of the types into pointers, member pointers, and arrays has
169+
// the same structure when ignoring cv-qualifiers at each level of the
170+
// decomposition. Meanwhile, C makes T(*)[] and T(*)[N] compatible, which
171+
// would really complicate any attempt to distinguish pointers to arrays by
172+
// their bounds. It's simpler, and much easier to explain to users, to
173+
// simply treat all pointers to arrays as pointers to their element type for
174+
// aliasing purposes. So when creating a TBAA tag for a pointer type, we
175+
// recursively ignore both qualifiers and array types when decomposing the
176+
// pointee type. The only meaningful remaining structure is the number of
177+
// pointer types we encountered along the way, so we just produce the tag
178+
// "p<depth> <base type tag>". If we do find a member pointer type, for now
179+
// we just conservatively bail out with AnyPtr (below) rather than trying to
180+
// create a tag that honors the similar-type rules while still
181+
// distinguishing different kinds of member pointer.
182+
unsigned ptrDepth = 0;
183+
do {
184+
ptrDepth++;
185+
ty = ty->getPointeeType()->getBaseElementTypeUnsafe();
186+
} while (ty->isPointerType());
187+
assert(!isa<VariableArrayType>(ty));
188+
// When the underlying type is a builtin type, we compute the pointee type
189+
// string recursively, which is implicitly more forgiving than the standards
190+
// require. Effectively, we are turning the question "are these types
191+
// compatible/similar" into "are accesses to these types allowed to alias".
192+
// In both C and C++, the latter question has special carve-outs for
193+
// signedness mismatches that only apply at the top level. As a result, we
194+
// are allowing e.g. `int *` l-values to access `unsigned *` objects.
195+
SmallString<256> tyName;
196+
197+
if (isa<BuiltinType>(ty)) {
198+
auto scalarAttr = getScalarTypeInfo(ty->getCanonicalTypeInternal());
199+
tyName = scalarAttr.getId();
200+
} else {
201+
// Be conservative if the type isn't a RecordType. We are specifically
202+
// required to do this for member pointers until we implement the
203+
// similar-types rule.
204+
const auto *rt = ty->getAs<RecordType>();
205+
if (!rt)
206+
return anyPtr;
207+
208+
// For unnamed structs or unions C's compatible types rule applies. Two
209+
// compatible types in different compilation units can have different
210+
// mangled names, meaning the metadata emitted below would incorrectly
211+
// mark them as no-alias. Use AnyPtr for such types in both C and C++, as
212+
// C and C++ types may be visible when doing LTO.
213+
//
214+
// Note that using AnyPtr is overly conservative. We could summarize the
215+
// members of the type, as per the C compatibility rule in the future.
216+
// This also covers anonymous structs and unions, which have a different
217+
// compatibility rule, but it doesn't matter because you can never have a
218+
// pointer to an anonymous struct or union.
219+
if (!rt->getDecl()->getDeclName())
220+
return anyPtr;
221+
222+
// For non-builtin types use the mangled name of the canonical type.
223+
llvm::raw_svector_ostream tyOut(tyName);
224+
types.getCXXABI().getMangleContext().mangleCanonicalTypeName(
225+
QualType(ty, 0), tyOut);
165226
}
166-
assert(!cir::MissingFeatures::tbaaPointer());
167-
return tbaa_NYI(mlirContext);
227+
228+
SmallString<256> outName("p");
229+
outName += std::to_string(ptrDepth);
230+
outName += " ";
231+
outName += tyName;
232+
return cir::TBAAScalarAttr::get(mlirContext, outName,
233+
types.convertType(qty), anyPtr);
168234
}
169235
// Accesses to arrays are accesses to objects of their element types.
170236
if (codeGenOpts.NewStructPathTBAA && ty->isArrayType()) {

clang/lib/CIR/CodeGen/CIRGenTBAA.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ class CIRGenTBAA {
112112
// An internal helper function to generate metadata used
113113
// to describe accesses to objects of the given type.
114114
cir::TBAAAttr getTypeInfoHelper(clang::QualType qty);
115-
cir::TBAAAttr getScalarTypeInfo(clang::QualType qty);
115+
cir::TBAAScalarAttr getScalarTypeInfo(clang::QualType qty);
116116

117117
cir::TBAAAttr getValidBaseTypeInfo(clang::QualType qty);
118118
cir::TBAAAttr getBaseTypeInfoHelper(const clang::Type *ty);

clang/lib/CIR/Lowering/DirectToLLVM/LowerTBAAToLLVM.cpp

+5-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,11 @@ class CIRToLLVMTBAAAttrLowering {
2525
if (auto scalarAttr = mlir::dyn_cast<cir::TBAAScalarAttr>(tbaa)) {
2626
mlir::DataLayout layout;
2727
auto size = layout.getTypeSize(scalarAttr.getType());
28-
return createScalarTypeNode(scalarAttr.getId(), getChar(), size);
28+
mlir::LLVM::TBAANodeAttr parent =
29+
scalarAttr.getParent()
30+
? lowerCIRTBAAAttrToLLVMTBAAAttr(scalarAttr.getParent())
31+
: getChar();
32+
return createScalarTypeNode(scalarAttr.getId(), parent, size);
2933
}
3034
if (auto structAttr = mlir::dyn_cast<cir::TBAAStructAttr>(tbaa)) {
3135
llvm::SmallVector<mlir::LLVM::TBAAMemberAttr, 4> members;

clang/test/CIR/CodeGen/tbaa-enum.c

+7-8
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,15 @@
11
// This is inspired from clang/test/CodeGen/tbaa.c, with both CIR and LLVM checks.
2-
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir -O1
2+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir -O1 -no-pointer-tbaa
33
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
4-
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -O1 -disable-llvm-passes
4+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -O1 -disable-llvm-passes -no-pointer-tbaa
55
// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
6-
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -O1 -disable-llvm-passes -relaxed-aliasing
6+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -O1 -disable-llvm-passes -relaxed-aliasing -no-pointer-tbaa
77
// RUN: FileCheck --check-prefix=NO-TBAA --input-file=%t.ll %s
8-
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -O0 -disable-llvm-passes
8+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -O0 -disable-llvm-passes -no-pointer-tbaa
99
// RUN: FileCheck --check-prefix=NO-TBAA --input-file=%t.ll %s
1010

1111
// NO-TBAA-NOT: !tbaa
1212

13-
// CIR: #tbaa[[NYI:.*]] = #cir.tbaa
1413
// CIR: #tbaa[[CHAR:.*]] = #cir.tbaa_omnipotent_char
1514
// CIR: #tbaa[[INT:.*]] = #cir.tbaa_scalar<id = "int", type = !s32i>
1615
// CIR: #tbaa[[LONG_LONG:.*]] = #cir.tbaa_scalar<id = "long long", type = !s64i>
@@ -136,10 +135,10 @@ uint8_t g3(Enum8 *E, uint8_t *val) {
136135
return *val;
137136
}
138137

139-
// LLVM: [[TAG_i32]] = !{[[TYPE_i32:!.*]], [[TYPE_i32]], i64 0}
140-
// LLVM: [[TYPE_i32]] = !{!"int", [[TYPE_char:!.*]],
141-
// LLVM: [[TYPE_char]] = !{!"omnipotent char", [[TAG_c_tbaa:!.*]],
138+
// LLVM: [[TYPE_char:!.*]] = !{!"omnipotent char", [[TAG_c_tbaa:!.*]],
142139
// LLVM: [[TAG_c_tbaa]] = !{!"Simple C/C++ TBAA"}
140+
// LLVM: [[TAG_i32]] = !{[[TYPE_i32:!.*]], [[TYPE_i32]], i64 0}
141+
// LLVM: [[TYPE_i32]] = !{!"int", [[TYPE_char]],
143142
// LLVM: [[TAG_i64]] = !{[[TYPE_i64:!.*]], [[TYPE_i64]], i64 0}
144143
// LLVM: [[TYPE_i64]] = !{!"long long", [[TYPE_char]],
145144
// LLVM: [[TAG_long]] = !{[[TYPE_long:!.*]], [[TYPE_long]], i64 0}

clang/test/CIR/CodeGen/tbaa-enum.cpp

+7-8
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,15 @@
11
// This is inspired from clang/test/CodeGen/tbaa.c, with both CIR and LLVM checks.
2-
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir -O1
2+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir -O1 -no-pointer-tbaa
33
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
4-
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -O1 -disable-llvm-passes
4+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -O1 -disable-llvm-passes -no-pointer-tbaa
55
// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
6-
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -O1 -disable-llvm-passes -relaxed-aliasing
6+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -O1 -disable-llvm-passes -relaxed-aliasing -no-pointer-tbaa
77
// RUN: FileCheck --check-prefix=NO-TBAA --input-file=%t.ll %s
8-
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -O0 -disable-llvm-passes
8+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -O0 -disable-llvm-passes -no-pointer-tbaa
99
// RUN: FileCheck --check-prefix=NO-TBAA --input-file=%t.ll %s
1010

1111
// NO-TBAA-NOT: !tbaa
1212

13-
// CIR: #tbaa[[NYI:.*]] = #cir.tbaa
1413
// CIR: #tbaa[[CHAR:.*]] = #cir.tbaa_omnipotent_char
1514
// CIR: #tbaa[[INT:.*]] = #cir.tbaa_scalar<id = "int", type = !s32i>
1615
// CIR: #tbaa[[EnumAuto32:.*]] = #cir.tbaa_scalar<id = "_ZTS10EnumAuto32", type = !u32i>
@@ -139,10 +138,10 @@ uint8_t g3(Enum8 *E, uint8_t *val) {
139138
return *val;
140139
}
141140

142-
// LLVM: [[TAG_i32]] = !{[[TYPE_i32:!.*]], [[TYPE_i32]], i64 0}
143-
// LLVM: [[TYPE_i32]] = !{!"int", [[TYPE_char:!.*]],
144-
// LLVM: [[TYPE_char]] = !{!"omnipotent char", [[TAG_c_tbaa:!.*]],
141+
// LLVM: [[TYPE_char:!.*]] = !{!"omnipotent char", [[TAG_c_tbaa:!.*]],
145142
// LLVM: [[TAG_c_tbaa]] = !{!"Simple C++ TBAA"}
143+
// LLVM: [[TAG_i32]] = !{[[TYPE_i32:!.*]], [[TYPE_i32]], i64 0}
144+
// LLVM: [[TYPE_i32]] = !{!"int", [[TYPE_char]],
146145
// LLVM: [[TAG_EnumAuto32]] = !{[[TYPE_EnumAuto32:!.*]], [[TYPE_EnumAuto32]], i64 0}
147146
// LLVM: [[TYPE_EnumAuto32]] = !{!"_ZTS10EnumAuto32", [[TYPE_char]],
148147
// LLVM: [[TAG_i64]] = !{[[TYPE_i64:!.*]], [[TYPE_i64]], i64 0}

0 commit comments

Comments
 (0)