[CIR][CodeGen] Const structs with bitfields (llvm#412)

gitoleg · lanza · commit bbed149ae4dd · 2024-06-20T14:52:11.000-07:00
This PR adds a support for const structs with bitfields. Now only global structs are supported, the support of the local ones can be added more or less easily - there is one ugly thing need to be done though) So .. what is all about. First of all - as usually, I'm sorry for the big PR. But it's hard to break it down to peaces. The good news is that in the same time it's a copy-pasta from the original codegen, no surprises here. Basically, the most hard place to read is `ConstantAggregateBuilder::addBits` copied with minimum of changes. The main problem - and frankly speaking I have no idea why it's done this way in the original codegen - is that the data layout is different for such structures, I mean literally another type is used. For instance, the code: ``` struct T { int X : 15; int Y : 6; unsigned Z : 9; int W; }; struct T GV = { 1, 5, 256, -1}; ``` is represented in LLVM IR (with no CIR enabled) as: ``` %struct.T = type { i32, i32 } %struct.Inner = type { i8, i32 } @gv = dso_local global { i8, i8, i8, i8, i32 } ... ``` i.e. the global var `GV` is looks like a struct of single bytes (up to the last field, which is not a btfield). And my guess is that we want to have the same behavior in CIR. So we do. The main problem is that we have to treat the same data differently - and this is why one additional `bitcast` is needed when we create a global var. Actually, there was a comment there - and I really wonder where it came from. But anyways, I don't really like this and don't see any good workaround here. Well, maybe we may add a kind of map in order to store the correspondence between types and do a bitcast more wisely. The same is true for the const structs with bitfields defined locally.
diff --git a/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td
@@ -209,6 +209,7 @@ def IntAttr : CIR_Attr<"Int", "int", [TypedAttrInterface]> {
     int64_t getSInt() const { return getValue().getSExtValue(); }
     uint64_t getUInt() const { return getValue().getZExtValue(); }
     bool isNullValue() const { return getValue() == 0; }
+    uint64_t getBitWidth() const { return getType().cast<IntType>().getWidth(); }
   }];
   let genVerifyDecl = 1;
   let hasCustomAssemblyFormat = 1;
diff --git a/clang/include/clang/CIR/Dialect/IR/CIRTypes.h b/clang/include/clang/CIR/Dialect/IR/CIRTypes.h
@@ -169,6 +169,8 @@ class StructType
   uint64_t getPreferredAlignment(const DataLayout &dataLayout,
                                  DataLayoutEntryListRef params) const;
 
+  bool isLayoutIdentical(const StructType &other);
+
   // Utilities for lazily computing and cacheing data layout info.
 private:
   mutable Type largestMember{};
diff --git a/clang/lib/CIR/CodeGen/CIRDataLayout.h b/clang/lib/CIR/CodeGen/CIRDataLayout.h
@@ -26,7 +26,7 @@ class CIRDataLayout {
   mlir::DataLayout layout;
 
   CIRDataLayout(mlir::ModuleOp modOp);
-  bool isBigEndian() { return bigEndian; }
+  bool isBigEndian() const { return bigEndian; }
 
   // `useABI` is `true` if not using prefered alignment.
   unsigned getAlignment(mlir::Type ty, bool useABI) const {
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
@@ -470,6 +470,22 @@ class CIRGenBuilderTy : public CIRBaseBuilderTy {
     return type;
   }
 
+  mlir::cir::StructType
+  getCompleteStructType(mlir::ArrayAttr fields, bool packed = false,
+                        llvm::StringRef name = "",
+                        const clang::RecordDecl *ast = nullptr) {
+    llvm::SmallVector<mlir::Type, 8> members;
+    for (auto &attr : fields) {
+      const auto typedAttr = attr.dyn_cast<mlir::TypedAttr>();
+      members.push_back(typedAttr.getType());
+    }
+
+    if (name.empty())
+      return getAnonStructTy(members, packed, ast);
+    else
+      return getCompleteStructTy(members, name, packed, ast);
+  }
+
   mlir::cir::ArrayType getArrayType(mlir::Type eltType, unsigned size) {
     return mlir::cir::ArrayType::get(getContext(), eltType, size);
   }
diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -699,9 +699,15 @@ static LValue buildGlobalVarDeclLValue(CIRGenFunction &CGF, const Expr *E,
     llvm_unreachable("not implemented");
 
   auto V = CGF.CGM.getAddrOfGlobalVar(VD);
+
+  if (VD->getTLSKind() != VarDecl::TLS_None)
+    llvm_unreachable("NYI");
+
   auto RealVarTy = CGF.getTypes().convertTypeForMem(VD->getType());
-  // TODO(cir): do we need this for CIR?
-  // V = EmitBitCastOfLValueToProperType(CGF, V, RealVarTy);
+  auto realPtrTy = CGF.getBuilder().getPointerTo(RealVarTy);
+  if (realPtrTy != V.getType())
+    V = CGF.getBuilder().createBitcast(V.getLoc(), V, realPtrTy);
+
   CharUnits Alignment = CGF.getContext().getDeclAlign(VD);
   Address Addr(V, RealVarTy, Alignment);
   // Emit reference to the private copy of the variable if it is an OpenMP
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprConst.cpp b/clang/lib/CIR/CodeGen/CIRGenExprConst.cpp
@@ -67,8 +67,13 @@ struct ConstantAggregateBuilderUtils {
     return getSize(C.getType());
   }
 
-  mlir::Attribute getPadding(CharUnits PadSize) const {
-    llvm_unreachable("NYI");
+  mlir::TypedAttr getPadding(CharUnits size) const {
+    auto eltTy = CGM.UCharTy;
+    auto arSize = size.getQuantity();
+    auto &bld = CGM.getBuilder();
+    SmallVector<mlir::Attribute, 4> elts(arSize, bld.getZeroAttr(eltTy));
+    return bld.getConstArray(mlir::ArrayAttr::get(bld.getContext(), elts),
+                             bld.getArrayType(eltTy, arSize));
   }
 
   mlir::Attribute getZeroes(CharUnits ZeroSize) const {
@@ -186,7 +191,111 @@ bool ConstantAggregateBuilder::add(mlir::Attribute A, CharUnits Offset,
 
 bool ConstantAggregateBuilder::addBits(llvm::APInt Bits, uint64_t OffsetInBits,
                                        bool AllowOverwrite) {
-  llvm_unreachable("NYI");
+  const ASTContext &Context = CGM.getASTContext();
+  const uint64_t CharWidth = CGM.getASTContext().getCharWidth();
+  auto charTy = CGM.getBuilder().getUIntNTy(CharWidth);
+  // Offset of where we want the first bit to go within the bits of the
+  // current char.
+  unsigned OffsetWithinChar = OffsetInBits % CharWidth;
+
+  // We split bit-fields up into individual bytes. Walk over the bytes and
+  // update them.
+  for (CharUnits OffsetInChars =
+           Context.toCharUnitsFromBits(OffsetInBits - OffsetWithinChar);
+       /**/; ++OffsetInChars) {
+    // Number of bits we want to fill in this char.
+    unsigned WantedBits =
+        std::min((uint64_t)Bits.getBitWidth(), CharWidth - OffsetWithinChar);
+
+    // Get a char containing the bits we want in the right places. The other
+    // bits have unspecified values.
+    llvm::APInt BitsThisChar = Bits;
+    if (BitsThisChar.getBitWidth() < CharWidth)
+      BitsThisChar = BitsThisChar.zext(CharWidth);
+    if (CGM.getDataLayout().isBigEndian()) {
+      // Figure out how much to shift by. We may need to left-shift if we have
+      // less than one byte of Bits left.
+      int Shift = Bits.getBitWidth() - CharWidth + OffsetWithinChar;
+      if (Shift > 0)
+        BitsThisChar.lshrInPlace(Shift);
+      else if (Shift < 0)
+        BitsThisChar = BitsThisChar.shl(-Shift);
+    } else {
+      BitsThisChar = BitsThisChar.shl(OffsetWithinChar);
+    }
+    if (BitsThisChar.getBitWidth() > CharWidth)
+      BitsThisChar = BitsThisChar.trunc(CharWidth);
+
+    if (WantedBits == CharWidth) {
+      // Got a full byte: just add it directly.
+      add(mlir::cir::IntAttr::get(charTy, BitsThisChar), OffsetInChars,
+          AllowOverwrite);
+    } else {
+      // Partial byte: update the existing integer if there is one. If we
+      // can't split out a 1-CharUnit range to update, then we can't add
+      // these bits and fail the entire constant emission.
+      std::optional<size_t> FirstElemToUpdate = splitAt(OffsetInChars);
+      if (!FirstElemToUpdate)
+        return false;
+      std::optional<size_t> LastElemToUpdate =
+          splitAt(OffsetInChars + CharUnits::One());
+      if (!LastElemToUpdate)
+        return false;
+      assert(*LastElemToUpdate - *FirstElemToUpdate < 2 &&
+             "should have at most one element covering one byte");
+
+      // Figure out which bits we want and discard the rest.
+      llvm::APInt UpdateMask(CharWidth, 0);
+      if (CGM.getDataLayout().isBigEndian())
+        UpdateMask.setBits(CharWidth - OffsetWithinChar - WantedBits,
+                           CharWidth - OffsetWithinChar);
+      else
+        UpdateMask.setBits(OffsetWithinChar, OffsetWithinChar + WantedBits);
+      BitsThisChar &= UpdateMask;
+      bool isNull = false;
+      if (*FirstElemToUpdate < Elems.size()) {
+        auto firstEltToUpdate =
+            dyn_cast<mlir::cir::IntAttr>(Elems[*FirstElemToUpdate]);
+        isNull = firstEltToUpdate && firstEltToUpdate.isNullValue();
+      }
+
+      if (*FirstElemToUpdate == *LastElemToUpdate || isNull) {
+        // All existing bits are either zero or undef.
+        add(CGM.getBuilder().getAttr<mlir::cir::IntAttr>(charTy, BitsThisChar),
+            OffsetInChars, /*AllowOverwrite*/ true);
+      } else {
+        mlir::cir::IntAttr CI =
+            dyn_cast<mlir::cir::IntAttr>(Elems[*FirstElemToUpdate]);
+        // In order to perform a partial update, we need the existing bitwise
+        // value, which we can only extract for a constant int.
+        // auto *CI = dyn_cast<llvm::ConstantInt>(ToUpdate);
+        if (!CI)
+          return false;
+        // Because this is a 1-CharUnit range, the constant occupying it must
+        // be exactly one CharUnit wide.
+        assert(CI.getBitWidth() == CharWidth && "splitAt failed");
+        assert((!(CI.getValue() & UpdateMask) || AllowOverwrite) &&
+               "unexpectedly overwriting bitfield");
+        BitsThisChar |= (CI.getValue() & ~UpdateMask);
+        Elems[*FirstElemToUpdate] =
+            CGM.getBuilder().getAttr<mlir::cir::IntAttr>(charTy, BitsThisChar);
+      }
+    }
+
+    // Stop if we've added all the bits.
+    if (WantedBits == Bits.getBitWidth())
+      break;
+
+    // Remove the consumed bits from Bits.
+    if (!CGM.getDataLayout().isBigEndian())
+      Bits.lshrInPlace(WantedBits);
+    Bits = Bits.trunc(Bits.getBitWidth() - WantedBits);
+
+    // The remanining bits go at the start of the following bytes.
+    OffsetWithinChar = 0;
+  }
+
+  return true;
 }
 
 /// Returns a position within Elems and Offsets such that all elements
@@ -236,6 +345,7 @@ mlir::Attribute ConstantAggregateBuilder::buildFrom(
 
   if (Elems.empty())
     return {};
+  auto Offset = [&](size_t I) { return Offsets[I] - StartOffset; };
 
   // If we want an array type, see if all the elements are the same type and
   // appropriately spaced.
@@ -276,14 +386,44 @@ mlir::Attribute ConstantAggregateBuilder::buildFrom(
   // as a non-packed struct and do so opportunistically if possible.
   llvm::SmallVector<mlir::Attribute, 32> PackedElems;
   if (!NaturalLayout) {
-    llvm_unreachable("NYI");
+    CharUnits SizeSoFar = CharUnits::Zero();
+    for (size_t I = 0; I != Elems.size(); ++I) {
+      mlir::TypedAttr C = Elems[I].dyn_cast<mlir::TypedAttr>();
+      assert(C && "expected typed attribute");
+
+      CharUnits Align = Utils.getAlignment(C);
+      CharUnits NaturalOffset = SizeSoFar.alignTo(Align);
+      CharUnits DesiredOffset = Offset(I);
+      assert(DesiredOffset >= SizeSoFar && "elements out of order");
+
+      if (DesiredOffset != NaturalOffset)
+        Packed = true;
+      if (DesiredOffset != SizeSoFar)
+        PackedElems.push_back(Utils.getPadding(DesiredOffset - SizeSoFar));
+      PackedElems.push_back(Elems[I]);
+      SizeSoFar = DesiredOffset + Utils.getSize(C);
+    }
+    // If we're using the packed layout, pad it out to the desired size if
+    // necessary.
+    if (Packed) {
+      assert(SizeSoFar <= DesiredSize &&
+             "requested size is too small for contents");
+
+      if (SizeSoFar < DesiredSize)
+        PackedElems.push_back(Utils.getPadding(DesiredSize - SizeSoFar));
+    }
   }
 
-  // TODO(cir): emit a #cir.zero if all elements are null values.
   auto &builder = CGM.getBuilder();
   auto arrAttr = mlir::ArrayAttr::get(builder.getContext(),
                                       Packed ? PackedElems : UnpackedElems);
-  return builder.getConstStructOrZeroAttr(arrAttr, Packed, DesiredTy);
+  auto strType = builder.getCompleteStructType(arrAttr, Packed);
+
+  if (auto desired = dyn_cast<mlir::cir::StructType>(DesiredTy))
+    if (desired.isLayoutIdentical(strType))
+      strType = desired;
+
+  return builder.getConstStructOrZeroAttr(arrAttr, Packed, strType);
 }
 
 void ConstantAggregateBuilder::condense(CharUnits Offset,
@@ -353,7 +493,7 @@ class ConstStructBuilder {
                    bool AllowOverwrite = false);
 
   bool AppendBitField(const FieldDecl *Field, uint64_t FieldOffset,
-                      mlir::IntegerAttr InitExpr, bool AllowOverwrite = false);
+                      mlir::cir::IntAttr InitExpr, bool AllowOverwrite = false);
 
   bool Build(InitListExpr *ILE, bool AllowOverwrite);
   bool Build(const APValue &Val, const RecordDecl *RD, bool IsPrimaryBase,
@@ -380,9 +520,26 @@ bool ConstStructBuilder::AppendBytes(CharUnits FieldOffsetInChars,
 
 bool ConstStructBuilder::AppendBitField(const FieldDecl *Field,
                                         uint64_t FieldOffset,
-                                        mlir::IntegerAttr CI,
+                                        mlir::cir::IntAttr CI,
                                         bool AllowOverwrite) {
-  llvm_unreachable("NYI");
+  const auto &RL = CGM.getTypes().getCIRGenRecordLayout(Field->getParent());
+  const auto &Info = RL.getBitFieldInfo(Field);
+  llvm::APInt FieldValue = CI.getValue();
+
+  // Promote the size of FieldValue if necessary
+  // FIXME: This should never occur, but currently it can because initializer
+  // constants are cast to bool, and because clang is not enforcing bitfield
+  // width limits.
+  if (Info.Size > FieldValue.getBitWidth())
+    FieldValue = FieldValue.zext(Info.Size);
+
+  // Truncate the size of FieldValue to the bit field size.
+  if (Info.Size < FieldValue.getBitWidth())
+    FieldValue = FieldValue.trunc(Info.Size);
+
+  return Builder.addBits(FieldValue,
+                         CGM.getASTContext().toBits(StartOffset) + FieldOffset,
+                         AllowOverwrite);
 }
 
 static bool EmitDesignatedInitUpdater(ConstantEmitter &Emitter,
@@ -513,7 +670,16 @@ bool ConstStructBuilder::Build(InitListExpr *ILE, bool AllowOverwrite) {
       if (Field->hasAttr<NoUniqueAddressAttr>())
         AllowOverwrite = true;
     } else {
-      llvm_unreachable("NYI");
+      // Otherwise we have a bitfield.
+      if (auto constInt = dyn_cast<mlir::cir::IntAttr>(EltInit)) {
+        if (!AppendBitField(Field, Layout.getFieldOffset(FieldNo), constInt,
+                            AllowOverwrite))
+          return false;
+      } else {
+        // We are trying to initialize a bitfield with a non-trivial constant,
+        // this must require run-time code.
+        return false;
+      }
     }
   }
 
@@ -994,9 +1160,13 @@ buildArrayConstant(CIRGenModule &CGM, mlir::Type DesiredType,
                                   ArrayBound));
   }
 
-  // We have mixed types. Use a packed struct.
-  assert(0 && "NYE");
-  return {};
+  SmallVector<mlir::Attribute, 4> Eles;
+  Eles.reserve(Elements.size());
+  for (auto const &Element : Elements)
+    Eles.push_back(Element);
+
+  auto arrAttr = mlir::ArrayAttr::get(builder.getContext(), Eles);
+  return builder.getAnonConstStruct(arrAttr, false);
 }
 
 } // end anonymous namespace.
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -653,11 +653,10 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef MangledName, mlir::Type Ty,
     // TODO(cir): LLVM codegen makes sure the result is of the correct type
     // by issuing a address space cast.
 
-    // TODO(cir):
-    // (In LLVM codgen, if global is requested for a definition, we always need
-    // to create a new global, otherwise return a bitcast.)
+    // (If global is requested for a definition, we always need to create a new
+    // global, not just return a bitcast.)
     if (!IsForDefinition)
-      assert(0 && "not implemented");
+      return Entry;
   }
 
   // TODO(cir): auto DAddrSpace = GetGlobalVarAddressSpace(D);
diff --git a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp
@@ -350,6 +350,16 @@ void StructType::complete(ArrayRef<Type> members, bool packed,
     llvm_unreachable("failed to complete struct");
 }
 
+bool StructType::isLayoutIdentical(const StructType &other) {
+  if (getImpl() == other.getImpl())
+    return true;
+
+  if (getPacked() != other.getPacked())
+    return false;
+
+  return getMembers() == other.getMembers();
+}
+
 //===----------------------------------------------------------------------===//
 // Data Layout information for types
 //===----------------------------------------------------------------------===//
diff --git a/clang/test/CIR/CodeGen/const-bitfields.c b/clang/test/CIR/CodeGen/const-bitfields.c