Skip to content

Commit bbed149

Browse files
gitoleglanza
authored andcommitted
[CIR][CodeGen] Const structs with bitfields (llvm#412)
This PR adds a support for const structs with bitfields. Now only global structs are supported, the support of the local ones can be added more or less easily - there is one ugly thing need to be done though) So .. what is all about. First of all - as usually, I'm sorry for the big PR. But it's hard to break it down to peaces. The good news is that in the same time it's a copy-pasta from the original codegen, no surprises here. Basically, the most hard place to read is `ConstantAggregateBuilder::addBits` copied with minimum of changes. The main problem - and frankly speaking I have no idea why it's done this way in the original codegen - is that the data layout is different for such structures, I mean literally another type is used. For instance, the code: ``` struct T { int X : 15; int Y : 6; unsigned Z : 9; int W; }; struct T GV = { 1, 5, 256, -1}; ``` is represented in LLVM IR (with no CIR enabled) as: ``` %struct.T = type { i32, i32 } %struct.Inner = type { i8, i32 } @gv = dso_local global { i8, i8, i8, i8, i32 } ... ``` i.e. the global var `GV` is looks like a struct of single bytes (up to the last field, which is not a btfield). And my guess is that we want to have the same behavior in CIR. So we do. The main problem is that we have to treat the same data differently - and this is why one additional `bitcast` is needed when we create a global var. Actually, there was a comment there - and I really wonder where it came from. But anyways, I don't really like this and don't see any good workaround here. Well, maybe we may add a kind of map in order to store the correspondence between types and do a bitcast more wisely. The same is true for the const structs with bitfields defined locally.
1 parent ef3992b commit bbed149

File tree

9 files changed

+271
-20
lines changed

9 files changed

+271
-20
lines changed

clang/include/clang/CIR/Dialect/IR/CIRAttrs.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ def IntAttr : CIR_Attr<"Int", "int", [TypedAttrInterface]> {
209209
int64_t getSInt() const { return getValue().getSExtValue(); }
210210
uint64_t getUInt() const { return getValue().getZExtValue(); }
211211
bool isNullValue() const { return getValue() == 0; }
212+
uint64_t getBitWidth() const { return getType().cast<IntType>().getWidth(); }
212213
}];
213214
let genVerifyDecl = 1;
214215
let hasCustomAssemblyFormat = 1;

clang/include/clang/CIR/Dialect/IR/CIRTypes.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,8 @@ class StructType
169169
uint64_t getPreferredAlignment(const DataLayout &dataLayout,
170170
DataLayoutEntryListRef params) const;
171171

172+
bool isLayoutIdentical(const StructType &other);
173+
172174
// Utilities for lazily computing and cacheing data layout info.
173175
private:
174176
mutable Type largestMember{};

clang/lib/CIR/CodeGen/CIRDataLayout.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ class CIRDataLayout {
2626
mlir::DataLayout layout;
2727

2828
CIRDataLayout(mlir::ModuleOp modOp);
29-
bool isBigEndian() { return bigEndian; }
29+
bool isBigEndian() const { return bigEndian; }
3030

3131
// `useABI` is `true` if not using prefered alignment.
3232
unsigned getAlignment(mlir::Type ty, bool useABI) const {

clang/lib/CIR/CodeGen/CIRGenBuilder.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,22 @@ class CIRGenBuilderTy : public CIRBaseBuilderTy {
470470
return type;
471471
}
472472

473+
mlir::cir::StructType
474+
getCompleteStructType(mlir::ArrayAttr fields, bool packed = false,
475+
llvm::StringRef name = "",
476+
const clang::RecordDecl *ast = nullptr) {
477+
llvm::SmallVector<mlir::Type, 8> members;
478+
for (auto &attr : fields) {
479+
const auto typedAttr = attr.dyn_cast<mlir::TypedAttr>();
480+
members.push_back(typedAttr.getType());
481+
}
482+
483+
if (name.empty())
484+
return getAnonStructTy(members, packed, ast);
485+
else
486+
return getCompleteStructTy(members, name, packed, ast);
487+
}
488+
473489
mlir::cir::ArrayType getArrayType(mlir::Type eltType, unsigned size) {
474490
return mlir::cir::ArrayType::get(getContext(), eltType, size);
475491
}

clang/lib/CIR/CodeGen/CIRGenExpr.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -699,9 +699,15 @@ static LValue buildGlobalVarDeclLValue(CIRGenFunction &CGF, const Expr *E,
699699
llvm_unreachable("not implemented");
700700

701701
auto V = CGF.CGM.getAddrOfGlobalVar(VD);
702+
703+
if (VD->getTLSKind() != VarDecl::TLS_None)
704+
llvm_unreachable("NYI");
705+
702706
auto RealVarTy = CGF.getTypes().convertTypeForMem(VD->getType());
703-
// TODO(cir): do we need this for CIR?
704-
// V = EmitBitCastOfLValueToProperType(CGF, V, RealVarTy);
707+
auto realPtrTy = CGF.getBuilder().getPointerTo(RealVarTy);
708+
if (realPtrTy != V.getType())
709+
V = CGF.getBuilder().createBitcast(V.getLoc(), V, realPtrTy);
710+
705711
CharUnits Alignment = CGF.getContext().getDeclAlign(VD);
706712
Address Addr(V, RealVarTy, Alignment);
707713
// Emit reference to the private copy of the variable if it is an OpenMP

clang/lib/CIR/CodeGen/CIRGenExprConst.cpp

Lines changed: 183 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,13 @@ struct ConstantAggregateBuilderUtils {
6767
return getSize(C.getType());
6868
}
6969

70-
mlir::Attribute getPadding(CharUnits PadSize) const {
71-
llvm_unreachable("NYI");
70+
mlir::TypedAttr getPadding(CharUnits size) const {
71+
auto eltTy = CGM.UCharTy;
72+
auto arSize = size.getQuantity();
73+
auto &bld = CGM.getBuilder();
74+
SmallVector<mlir::Attribute, 4> elts(arSize, bld.getZeroAttr(eltTy));
75+
return bld.getConstArray(mlir::ArrayAttr::get(bld.getContext(), elts),
76+
bld.getArrayType(eltTy, arSize));
7277
}
7378

7479
mlir::Attribute getZeroes(CharUnits ZeroSize) const {
@@ -186,7 +191,111 @@ bool ConstantAggregateBuilder::add(mlir::Attribute A, CharUnits Offset,
186191

187192
bool ConstantAggregateBuilder::addBits(llvm::APInt Bits, uint64_t OffsetInBits,
188193
bool AllowOverwrite) {
189-
llvm_unreachable("NYI");
194+
const ASTContext &Context = CGM.getASTContext();
195+
const uint64_t CharWidth = CGM.getASTContext().getCharWidth();
196+
auto charTy = CGM.getBuilder().getUIntNTy(CharWidth);
197+
// Offset of where we want the first bit to go within the bits of the
198+
// current char.
199+
unsigned OffsetWithinChar = OffsetInBits % CharWidth;
200+
201+
// We split bit-fields up into individual bytes. Walk over the bytes and
202+
// update them.
203+
for (CharUnits OffsetInChars =
204+
Context.toCharUnitsFromBits(OffsetInBits - OffsetWithinChar);
205+
/**/; ++OffsetInChars) {
206+
// Number of bits we want to fill in this char.
207+
unsigned WantedBits =
208+
std::min((uint64_t)Bits.getBitWidth(), CharWidth - OffsetWithinChar);
209+
210+
// Get a char containing the bits we want in the right places. The other
211+
// bits have unspecified values.
212+
llvm::APInt BitsThisChar = Bits;
213+
if (BitsThisChar.getBitWidth() < CharWidth)
214+
BitsThisChar = BitsThisChar.zext(CharWidth);
215+
if (CGM.getDataLayout().isBigEndian()) {
216+
// Figure out how much to shift by. We may need to left-shift if we have
217+
// less than one byte of Bits left.
218+
int Shift = Bits.getBitWidth() - CharWidth + OffsetWithinChar;
219+
if (Shift > 0)
220+
BitsThisChar.lshrInPlace(Shift);
221+
else if (Shift < 0)
222+
BitsThisChar = BitsThisChar.shl(-Shift);
223+
} else {
224+
BitsThisChar = BitsThisChar.shl(OffsetWithinChar);
225+
}
226+
if (BitsThisChar.getBitWidth() > CharWidth)
227+
BitsThisChar = BitsThisChar.trunc(CharWidth);
228+
229+
if (WantedBits == CharWidth) {
230+
// Got a full byte: just add it directly.
231+
add(mlir::cir::IntAttr::get(charTy, BitsThisChar), OffsetInChars,
232+
AllowOverwrite);
233+
} else {
234+
// Partial byte: update the existing integer if there is one. If we
235+
// can't split out a 1-CharUnit range to update, then we can't add
236+
// these bits and fail the entire constant emission.
237+
std::optional<size_t> FirstElemToUpdate = splitAt(OffsetInChars);
238+
if (!FirstElemToUpdate)
239+
return false;
240+
std::optional<size_t> LastElemToUpdate =
241+
splitAt(OffsetInChars + CharUnits::One());
242+
if (!LastElemToUpdate)
243+
return false;
244+
assert(*LastElemToUpdate - *FirstElemToUpdate < 2 &&
245+
"should have at most one element covering one byte");
246+
247+
// Figure out which bits we want and discard the rest.
248+
llvm::APInt UpdateMask(CharWidth, 0);
249+
if (CGM.getDataLayout().isBigEndian())
250+
UpdateMask.setBits(CharWidth - OffsetWithinChar - WantedBits,
251+
CharWidth - OffsetWithinChar);
252+
else
253+
UpdateMask.setBits(OffsetWithinChar, OffsetWithinChar + WantedBits);
254+
BitsThisChar &= UpdateMask;
255+
bool isNull = false;
256+
if (*FirstElemToUpdate < Elems.size()) {
257+
auto firstEltToUpdate =
258+
dyn_cast<mlir::cir::IntAttr>(Elems[*FirstElemToUpdate]);
259+
isNull = firstEltToUpdate && firstEltToUpdate.isNullValue();
260+
}
261+
262+
if (*FirstElemToUpdate == *LastElemToUpdate || isNull) {
263+
// All existing bits are either zero or undef.
264+
add(CGM.getBuilder().getAttr<mlir::cir::IntAttr>(charTy, BitsThisChar),
265+
OffsetInChars, /*AllowOverwrite*/ true);
266+
} else {
267+
mlir::cir::IntAttr CI =
268+
dyn_cast<mlir::cir::IntAttr>(Elems[*FirstElemToUpdate]);
269+
// In order to perform a partial update, we need the existing bitwise
270+
// value, which we can only extract for a constant int.
271+
// auto *CI = dyn_cast<llvm::ConstantInt>(ToUpdate);
272+
if (!CI)
273+
return false;
274+
// Because this is a 1-CharUnit range, the constant occupying it must
275+
// be exactly one CharUnit wide.
276+
assert(CI.getBitWidth() == CharWidth && "splitAt failed");
277+
assert((!(CI.getValue() & UpdateMask) || AllowOverwrite) &&
278+
"unexpectedly overwriting bitfield");
279+
BitsThisChar |= (CI.getValue() & ~UpdateMask);
280+
Elems[*FirstElemToUpdate] =
281+
CGM.getBuilder().getAttr<mlir::cir::IntAttr>(charTy, BitsThisChar);
282+
}
283+
}
284+
285+
// Stop if we've added all the bits.
286+
if (WantedBits == Bits.getBitWidth())
287+
break;
288+
289+
// Remove the consumed bits from Bits.
290+
if (!CGM.getDataLayout().isBigEndian())
291+
Bits.lshrInPlace(WantedBits);
292+
Bits = Bits.trunc(Bits.getBitWidth() - WantedBits);
293+
294+
// The remanining bits go at the start of the following bytes.
295+
OffsetWithinChar = 0;
296+
}
297+
298+
return true;
190299
}
191300

192301
/// Returns a position within Elems and Offsets such that all elements
@@ -236,6 +345,7 @@ mlir::Attribute ConstantAggregateBuilder::buildFrom(
236345

237346
if (Elems.empty())
238347
return {};
348+
auto Offset = [&](size_t I) { return Offsets[I] - StartOffset; };
239349

240350
// If we want an array type, see if all the elements are the same type and
241351
// appropriately spaced.
@@ -276,14 +386,44 @@ mlir::Attribute ConstantAggregateBuilder::buildFrom(
276386
// as a non-packed struct and do so opportunistically if possible.
277387
llvm::SmallVector<mlir::Attribute, 32> PackedElems;
278388
if (!NaturalLayout) {
279-
llvm_unreachable("NYI");
389+
CharUnits SizeSoFar = CharUnits::Zero();
390+
for (size_t I = 0; I != Elems.size(); ++I) {
391+
mlir::TypedAttr C = Elems[I].dyn_cast<mlir::TypedAttr>();
392+
assert(C && "expected typed attribute");
393+
394+
CharUnits Align = Utils.getAlignment(C);
395+
CharUnits NaturalOffset = SizeSoFar.alignTo(Align);
396+
CharUnits DesiredOffset = Offset(I);
397+
assert(DesiredOffset >= SizeSoFar && "elements out of order");
398+
399+
if (DesiredOffset != NaturalOffset)
400+
Packed = true;
401+
if (DesiredOffset != SizeSoFar)
402+
PackedElems.push_back(Utils.getPadding(DesiredOffset - SizeSoFar));
403+
PackedElems.push_back(Elems[I]);
404+
SizeSoFar = DesiredOffset + Utils.getSize(C);
405+
}
406+
// If we're using the packed layout, pad it out to the desired size if
407+
// necessary.
408+
if (Packed) {
409+
assert(SizeSoFar <= DesiredSize &&
410+
"requested size is too small for contents");
411+
412+
if (SizeSoFar < DesiredSize)
413+
PackedElems.push_back(Utils.getPadding(DesiredSize - SizeSoFar));
414+
}
280415
}
281416

282-
// TODO(cir): emit a #cir.zero if all elements are null values.
283417
auto &builder = CGM.getBuilder();
284418
auto arrAttr = mlir::ArrayAttr::get(builder.getContext(),
285419
Packed ? PackedElems : UnpackedElems);
286-
return builder.getConstStructOrZeroAttr(arrAttr, Packed, DesiredTy);
420+
auto strType = builder.getCompleteStructType(arrAttr, Packed);
421+
422+
if (auto desired = dyn_cast<mlir::cir::StructType>(DesiredTy))
423+
if (desired.isLayoutIdentical(strType))
424+
strType = desired;
425+
426+
return builder.getConstStructOrZeroAttr(arrAttr, Packed, strType);
287427
}
288428

289429
void ConstantAggregateBuilder::condense(CharUnits Offset,
@@ -353,7 +493,7 @@ class ConstStructBuilder {
353493
bool AllowOverwrite = false);
354494

355495
bool AppendBitField(const FieldDecl *Field, uint64_t FieldOffset,
356-
mlir::IntegerAttr InitExpr, bool AllowOverwrite = false);
496+
mlir::cir::IntAttr InitExpr, bool AllowOverwrite = false);
357497

358498
bool Build(InitListExpr *ILE, bool AllowOverwrite);
359499
bool Build(const APValue &Val, const RecordDecl *RD, bool IsPrimaryBase,
@@ -380,9 +520,26 @@ bool ConstStructBuilder::AppendBytes(CharUnits FieldOffsetInChars,
380520

381521
bool ConstStructBuilder::AppendBitField(const FieldDecl *Field,
382522
uint64_t FieldOffset,
383-
mlir::IntegerAttr CI,
523+
mlir::cir::IntAttr CI,
384524
bool AllowOverwrite) {
385-
llvm_unreachable("NYI");
525+
const auto &RL = CGM.getTypes().getCIRGenRecordLayout(Field->getParent());
526+
const auto &Info = RL.getBitFieldInfo(Field);
527+
llvm::APInt FieldValue = CI.getValue();
528+
529+
// Promote the size of FieldValue if necessary
530+
// FIXME: This should never occur, but currently it can because initializer
531+
// constants are cast to bool, and because clang is not enforcing bitfield
532+
// width limits.
533+
if (Info.Size > FieldValue.getBitWidth())
534+
FieldValue = FieldValue.zext(Info.Size);
535+
536+
// Truncate the size of FieldValue to the bit field size.
537+
if (Info.Size < FieldValue.getBitWidth())
538+
FieldValue = FieldValue.trunc(Info.Size);
539+
540+
return Builder.addBits(FieldValue,
541+
CGM.getASTContext().toBits(StartOffset) + FieldOffset,
542+
AllowOverwrite);
386543
}
387544

388545
static bool EmitDesignatedInitUpdater(ConstantEmitter &Emitter,
@@ -513,7 +670,16 @@ bool ConstStructBuilder::Build(InitListExpr *ILE, bool AllowOverwrite) {
513670
if (Field->hasAttr<NoUniqueAddressAttr>())
514671
AllowOverwrite = true;
515672
} else {
516-
llvm_unreachable("NYI");
673+
// Otherwise we have a bitfield.
674+
if (auto constInt = dyn_cast<mlir::cir::IntAttr>(EltInit)) {
675+
if (!AppendBitField(Field, Layout.getFieldOffset(FieldNo), constInt,
676+
AllowOverwrite))
677+
return false;
678+
} else {
679+
// We are trying to initialize a bitfield with a non-trivial constant,
680+
// this must require run-time code.
681+
return false;
682+
}
517683
}
518684
}
519685

@@ -994,9 +1160,13 @@ buildArrayConstant(CIRGenModule &CGM, mlir::Type DesiredType,
9941160
ArrayBound));
9951161
}
9961162

997-
// We have mixed types. Use a packed struct.
998-
assert(0 && "NYE");
999-
return {};
1163+
SmallVector<mlir::Attribute, 4> Eles;
1164+
Eles.reserve(Elements.size());
1165+
for (auto const &Element : Elements)
1166+
Eles.push_back(Element);
1167+
1168+
auto arrAttr = mlir::ArrayAttr::get(builder.getContext(), Eles);
1169+
return builder.getAnonConstStruct(arrAttr, false);
10001170
}
10011171

10021172
} // end anonymous namespace.

clang/lib/CIR/CodeGen/CIRGenModule.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -653,11 +653,10 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef MangledName, mlir::Type Ty,
653653
// TODO(cir): LLVM codegen makes sure the result is of the correct type
654654
// by issuing a address space cast.
655655

656-
// TODO(cir):
657-
// (In LLVM codgen, if global is requested for a definition, we always need
658-
// to create a new global, otherwise return a bitcast.)
656+
// (If global is requested for a definition, we always need to create a new
657+
// global, not just return a bitcast.)
659658
if (!IsForDefinition)
660-
assert(0 && "not implemented");
659+
return Entry;
661660
}
662661

663662
// TODO(cir): auto DAddrSpace = GetGlobalVarAddressSpace(D);

clang/lib/CIR/Dialect/IR/CIRTypes.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,16 @@ void StructType::complete(ArrayRef<Type> members, bool packed,
350350
llvm_unreachable("failed to complete struct");
351351
}
352352

353+
bool StructType::isLayoutIdentical(const StructType &other) {
354+
if (getImpl() == other.getImpl())
355+
return true;
356+
357+
if (getPacked() != other.getPacked())
358+
return false;
359+
360+
return getMembers() == other.getMembers();
361+
}
362+
353363
//===----------------------------------------------------------------------===//
354364
// Data Layout information for types
355365
//===----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)