Skip to content

Commit 981f07a

Browse files
gitoleglanza
authored andcommitted
[CIR][CodeGen] handle zero init padding case (#1257)
I continue to use `csmith` and catch run time bags. Now it's time to fix the layout for the const structs. There is a divergence between const structs generated by CIR and the original codegen. And this PR makes one more step to eliminate it. There are cases where the extra padding is required - and here is a fix for some of them. I did not write extra tests, since the fixes in the existing already covers the code I added. The point is that now the layout for all of these structs in the LLVM IR with and without CIR is the same.
1 parent b0a4d33 commit 981f07a

File tree

5 files changed

+118
-13
lines changed

5 files changed

+118
-13
lines changed

clang/lib/CIR/CodeGen/CIRGenExprConst.cpp

+60-6
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,19 @@ using namespace clang::CIRGen;
4343
namespace {
4444
class ConstExprEmitter;
4545

46+
static mlir::TypedAttr computePadding(CIRGenModule &CGM, CharUnits size) {
47+
auto eltTy = CGM.UCharTy;
48+
auto arSize = size.getQuantity();
49+
auto &bld = CGM.getBuilder();
50+
if (size > CharUnits::One()) {
51+
SmallVector<mlir::Attribute, 4> elts(arSize, bld.getZeroAttr(eltTy));
52+
return bld.getConstArray(mlir::ArrayAttr::get(bld.getContext(), elts),
53+
bld.getArrayType(eltTy, arSize));
54+
} else {
55+
return cir::ZeroAttr::get(bld.getContext(), eltTy);
56+
}
57+
}
58+
4659
static mlir::Attribute
4760
emitArrayConstant(CIRGenModule &CGM, mlir::Type DesiredType,
4861
mlir::Type CommonElementType, unsigned ArrayBound,
@@ -70,12 +83,7 @@ struct ConstantAggregateBuilderUtils {
7083
}
7184

7285
mlir::TypedAttr getPadding(CharUnits size) const {
73-
auto eltTy = CGM.UCharTy;
74-
auto arSize = size.getQuantity();
75-
auto &bld = CGM.getBuilder();
76-
SmallVector<mlir::Attribute, 4> elts(arSize, bld.getZeroAttr(eltTy));
77-
return bld.getConstArray(mlir::ArrayAttr::get(bld.getContext(), elts),
78-
bld.getArrayType(eltTy, arSize));
86+
return computePadding(CGM, size);
7987
}
8088

8189
mlir::Attribute getZeroes(CharUnits ZeroSize) const {
@@ -508,6 +516,11 @@ class ConstStructBuilder {
508516
bool Build(InitListExpr *ILE, bool AllowOverwrite);
509517
bool Build(const APValue &Val, const RecordDecl *RD, bool IsPrimaryBase,
510518
const CXXRecordDecl *VTableClass, CharUnits BaseOffset);
519+
520+
bool ApplyZeroInitPadding(const ASTRecordLayout &Layout, unsigned FieldNo,
521+
const FieldDecl &Field, bool AllowOverwrite,
522+
CharUnits &SizeSoFar, bool &ZeroFieldSize);
523+
511524
mlir::Attribute Finalize(QualType Ty);
512525
};
513526

@@ -614,6 +627,10 @@ bool ConstStructBuilder::Build(InitListExpr *ILE, bool AllowOverwrite) {
614627
if (CXXRD->getNumBases())
615628
return false;
616629

630+
const bool ZeroInitPadding = CGM.shouldZeroInitPadding();
631+
bool ZeroFieldSize = false;
632+
CharUnits SizeSoFar = CharUnits::Zero();
633+
617634
for (FieldDecl *Field : RD->fields()) {
618635
++FieldNo;
619636

@@ -642,6 +659,11 @@ bool ConstStructBuilder::Build(InitListExpr *ILE, bool AllowOverwrite) {
642659
continue;
643660
}
644661

662+
if (ZeroInitPadding &&
663+
!ApplyZeroInitPadding(Layout, FieldNo, *Field, AllowOverwrite,
664+
SizeSoFar, ZeroFieldSize))
665+
return false;
666+
645667
// When emitting a DesignatedInitUpdateExpr, a nested InitListExpr
646668
// represents additional overwriting of our current constant value, and not
647669
// a new constant to emit independently.
@@ -784,6 +806,38 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD,
784806
return true;
785807
}
786808

809+
bool ConstStructBuilder::ApplyZeroInitPadding(
810+
const ASTRecordLayout &Layout, unsigned FieldNo, const FieldDecl &Field,
811+
bool AllowOverwrite, CharUnits &SizeSoFar, bool &ZeroFieldSize) {
812+
813+
uint64_t StartBitOffset = Layout.getFieldOffset(FieldNo);
814+
CharUnits StartOffset =
815+
CGM.getASTContext().toCharUnitsFromBits(StartBitOffset);
816+
if (SizeSoFar < StartOffset) {
817+
if (!AppendBytes(SizeSoFar, computePadding(CGM, StartOffset - SizeSoFar),
818+
AllowOverwrite))
819+
return false;
820+
}
821+
822+
if (!Field.isBitField()) {
823+
CharUnits FieldSize =
824+
CGM.getASTContext().getTypeSizeInChars(Field.getType());
825+
SizeSoFar = StartOffset + FieldSize;
826+
ZeroFieldSize = FieldSize.isZero();
827+
} else {
828+
const CIRGenRecordLayout &RL =
829+
CGM.getTypes().getCIRGenRecordLayout(Field.getParent());
830+
const CIRGenBitFieldInfo &Info = RL.getBitFieldInfo(&Field);
831+
uint64_t EndBitOffset = StartBitOffset + Info.Size;
832+
SizeSoFar = CGM.getASTContext().toCharUnitsFromBits(EndBitOffset);
833+
if (EndBitOffset % CGM.getASTContext().getCharWidth() != 0) {
834+
SizeSoFar++;
835+
}
836+
ZeroFieldSize = Info.Size == 0;
837+
}
838+
return true;
839+
}
840+
787841
mlir::Attribute ConstStructBuilder::Finalize(QualType Type) {
788842
Type = Type.getNonReferenceType();
789843
RecordDecl *RD = Type->castAs<RecordType>()->getDecl();

clang/lib/CIR/CodeGen/CIRGenModule.h

+50
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,56 @@ class CIRGenModule : public CIRGenTypeCache {
273273
return getTriple().isSPIRVLogical();
274274
}
275275

276+
bool shouldZeroInitPadding() const {
277+
// In C23 (N3096) $6.7.10:
278+
// """
279+
// If any object is initialized with an empty initializer, then it is
280+
// subject to default initialization:
281+
// - if it is an aggregate, every member is initialized (recursively)
282+
// according to these rules, and any padding is initialized to zero bits;
283+
// - if it is a union, the first named member is initialized (recursively)
284+
// according to these rules, and any padding is initialized to zero bits.
285+
//
286+
// If the aggregate or union contains elements or members that are
287+
// aggregates or unions, these rules apply recursively to the subaggregates
288+
// or contained unions.
289+
//
290+
// If there are fewer initializers in a brace-enclosed list than there are
291+
// elements or members of an aggregate, or fewer characters in a string
292+
// literal used to initialize an array of known size than there are elements
293+
// in the array, the remainder of the aggregate is subject to default
294+
// initialization.
295+
// """
296+
//
297+
// The standard seems ambiguous in the following two areas:
298+
// 1. For a union type with empty initializer, if the first named member is
299+
// not the largest member, then the bytes comes after the first named member
300+
// but before padding are left unspecified. An example is:
301+
// union U { int a; long long b;};
302+
// union U u = {}; // The first 4 bytes are 0, but 4-8 bytes are left
303+
// unspecified.
304+
//
305+
// 2. It only mentions padding for empty initializer, but doesn't mention
306+
// padding for a non empty initialization list. And if the aggregation or
307+
// union contains elements or members that are aggregates or unions, and
308+
// some are non empty initializers, while others are empty initializers,
309+
// the padding initialization is unclear. An example is:
310+
// struct S1 { int a; long long b; };
311+
// struct S2 { char c; struct S1 s1; };
312+
// // The values for paddings between s2.c and s2.s1.a, between s2.s1.a
313+
// and s2.s1.b are unclear.
314+
// struct S2 s2 = { 'c' };
315+
//
316+
// Here we choose to zero initiailize left bytes of a union type because
317+
// projects like the Linux kernel are relying on this behavior. If we don't
318+
// explicitly zero initialize them, the undef values can be optimized to
319+
// return garbage data. We also choose to zero initialize paddings for
320+
// aggregates and unions, no matter they are initialized by empty
321+
// initializers or non empty initializers. This can provide a consistent
322+
// behavior. So projects like the Linux kernel can rely on it.
323+
return !getLangOpts().CPlusPlus;
324+
}
325+
276326
/// Return the mlir::Value for the address of the given global variable.
277327
/// If Ty is non-null and if the global doesn't exist, then it will be created
278328
/// with the specified type instead of whatever the normal requested type

clang/test/CIR/CodeGen/bitfields.c

+3-3
Original file line numberDiff line numberDiff line change
@@ -59,12 +59,12 @@ typedef struct {
5959
// CHECK: !ty_G = !cir.struct<struct "G" {!u16i, !s32i} #cir.record.decl.ast>
6060
// CHECK: !ty_T = !cir.struct<struct "T" {!u8i, !u32i} #cir.record.decl.ast>
6161
// CHECK: !ty_anon2E0_ = !cir.struct<struct "anon.0" {!u32i} #cir.record.decl.ast>
62-
// CHECK: !ty_anon_struct = !cir.struct<struct {!u8i, !u8i, !s32i}>
6362
// CHECK: #bfi_a = #cir.bitfield_info<name = "a", storage_type = !u8i, size = 3, offset = 0, is_signed = true>
6463
// CHECK: #bfi_e = #cir.bitfield_info<name = "e", storage_type = !u16i, size = 15, offset = 0, is_signed = true>
6564
// CHECK: !ty_S = !cir.struct<struct "S" {!u32i, !cir.array<!u8i x 3>, !u16i, !u32i}>
6665
// CHECK: !ty_U = !cir.struct<struct "U" {!s8i, !s8i, !s8i, !cir.array<!u8i x 9>}>
6766
// CHECK: !ty___long = !cir.struct<struct "__long" {!ty_anon2E0_, !u32i, !cir.ptr<!u32i>}>
67+
// CHECK: !ty_anon_struct = !cir.struct<struct {!u8i, !u8i, !cir.array<!u8i x 2>, !s32i}>
6868
// CHECK: #bfi_d = #cir.bitfield_info<name = "d", storage_type = !cir.array<!u8i x 3>, size = 2, offset = 17, is_signed = true>
6969

7070
// CHECK: cir.func {{.*@store_field}}
@@ -127,7 +127,7 @@ void createU() {
127127
// CHECK: cir.func {{.*@createD}}
128128
// CHECK: %0 = cir.alloca !ty_D, !cir.ptr<!ty_D>, ["d"] {alignment = 4 : i64}
129129
// CHECK: %1 = cir.cast(bitcast, %0 : !cir.ptr<!ty_D>), !cir.ptr<!ty_anon_struct>
130-
// CHECK: %2 = cir.const #cir.const_struct<{#cir.int<33> : !u8i, #cir.int<0> : !u8i, #cir.int<3> : !s32i}> : !ty_anon_struct
130+
// CHECK: %2 = cir.const #cir.const_struct<{#cir.int<33> : !u8i, #cir.int<0> : !u8i, #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array<!u8i x 2>, #cir.int<3> : !s32i}> : !ty_anon_struct
131131
// CHECK: cir.store %2, %1 : !ty_anon_struct, !cir.ptr<!ty_anon_struct>
132132
void createD() {
133133
D d = {1,2,3};
@@ -148,7 +148,7 @@ typedef struct {
148148
int y ;
149149
} G;
150150

151-
// CHECK: cir.global external @g = #cir.const_struct<{#cir.int<133> : !u8i, #cir.int<127> : !u8i, #cir.int<254> : !s32i}> : !ty_anon_struct
151+
// CHECK: cir.global external @g = #cir.const_struct<{#cir.int<133> : !u8i, #cir.int<127> : !u8i, #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array<!u8i x 2>, #cir.int<254> : !s32i}> : !ty_anon_struct
152152
G g = { -123, 254UL};
153153

154154
// CHECK: cir.func {{.*@get_y}}

clang/test/CIR/CodeGen/const-bitfields.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,13 @@ struct Inner {
1414
unsigned d : 30;
1515
};
1616

17-
// CHECK: !ty_anon_struct = !cir.struct<struct {!u8i, !u8i, !u8i, !s32i}>
17+
// CHECK: !ty_anon_struct = !cir.struct<struct {!u8i, !u8i, !u8i, !u8i, !s32i}>
1818
// CHECK: !ty_T = !cir.struct<struct "T" {!cir.array<!u8i x 3>, !s32i} #cir.record.decl.ast>
1919
// CHECK: !ty_anon_struct1 = !cir.struct<struct {!u8i, !cir.array<!u8i x 3>, !u8i, !u8i, !u8i, !u8i}>
2020
// CHECK: #bfi_Z = #cir.bitfield_info<name = "Z", storage_type = !cir.array<!u8i x 3>, size = 9, offset = 11, is_signed = true>
2121

2222
struct T GV = { 1, 5, 26, 42 };
23-
// CHECK: cir.global external @GV = #cir.const_struct<{#cir.int<161> : !u8i, #cir.int<208> : !u8i, #cir.int<0> : !u8i, #cir.int<42> : !s32i}> : !ty_anon_struct
23+
// CHECK: cir.global external @GV = #cir.const_struct<{#cir.int<161> : !u8i, #cir.int<208> : !u8i, #cir.int<0> : !u8i, #cir.zero : !u8i, #cir.int<42> : !s32i}> : !ty_anon_struct
2424

2525
// check padding is used (const array of zeros)
2626
struct Inner var = { 1, 0, 1, 21};

clang/test/CIR/CodeGen/struct.c

+3-2
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,9 @@ void shouldConstInitStructs(void) {
3838
// CHECK: cir.func @shouldConstInitStructs
3939
struct Foo f = {1, 2, {3, 4}};
4040
// CHECK: %[[#V0:]] = cir.alloca !ty_Foo, !cir.ptr<!ty_Foo>, ["f"] {alignment = 4 : i64}
41-
// CHECK: %[[#V1:]] = cir.const #cir.const_struct<{#cir.int<1> : !s32i, #cir.int<2> : !s8i, #cir.const_struct<{#cir.int<3> : !s32i, #cir.int<4> : !s8i}> : !ty_Bar}> : !ty_Foo
42-
// CHECK: cir.store %[[#V1]], %[[#V0]] : !ty_Foo, !cir.ptr<!ty_Foo>
41+
// CHECK: %[[#V1:]] = cir.cast(bitcast, %[[#V0]] : !cir.ptr<!ty_Foo>), !cir.ptr<!ty_anon_struct> l
42+
// CHECK: %[[#V2:]] = cir.const #cir.const_struct<{#cir.int<1> : !s32i, #cir.int<2> : !s8i, #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array<!u8i x 3>, #cir.const_struct<{#cir.int<3> : !s32i, #cir.int<4> : !s8i}> : !ty_Bar}> : !ty_anon_struct
43+
// CHECK: cir.store %[[#V2]], %[[#V1]] : !ty_anon_struct, !cir.ptr<!ty_anon_struct>
4344
}
4445

4546
// Should zero-initialize uninitialized global structs.

0 commit comments

Comments
 (0)