From 26a1cd1313ad8ae0d3b4582226a871b0e8b90fbc Mon Sep 17 00:00:00 2001 From: Sirui Mu Date: Tue, 9 Jan 2024 08:14:55 +0800 Subject: [PATCH] [CIR][CIRGen] Support array def after decl with unknown bound (#375) Arrays can be first declared without a known bound, and then defined with a known bound. For example: ```cpp extern int data[]; int test() { return data[1]; } int data[3] {1, 2, 3}; ``` Currently `clangir` crashes on generating CIR for this case. This is due to the type of the `data` definition being different from its declaration. This patch adds support for such a case. --- clang/lib/CIR/CodeGen/CIRGenModule.cpp | 49 ++++++++++++++++--- clang/lib/CIR/CodeGen/CIRGenModule.h | 11 ++++- .../test/CIR/CodeGen/array-unknown-bound.cpp | 14 ++++++ 3 files changed, 67 insertions(+), 7 deletions(-) create mode 100644 clang/test/CIR/CodeGen/array-unknown-bound.cpp diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index 2c1328df564a..0013adb582b1 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -471,7 +471,8 @@ mlir::Value CIRGenModule::getGlobalValue(const Decl *D) { mlir::cir::GlobalOp CIRGenModule::createGlobalOp(CIRGenModule &CGM, mlir::Location loc, StringRef name, mlir::Type t, - bool isCst) { + bool isCst, + mlir::Operation *insertPoint) { mlir::cir::GlobalOp g; auto &builder = CGM.getBuilder(); { @@ -486,8 +487,12 @@ mlir::cir::GlobalOp CIRGenModule::createGlobalOp(CIRGenModule &CGM, builder.setInsertionPoint(curCGF->CurFn); g = builder.create(loc, name, t, isCst); - if (!curCGF) - CGM.getModule().push_back(g); + if (!curCGF) { + if (insertPoint) + CGM.getModule().insert(insertPoint, g); + else + CGM.getModule().push_back(g); + } // Default to private until we can judge based on the initializer, // since MLIR doesn't allow public declarations. @@ -501,6 +506,35 @@ void CIRGenModule::setCommonAttributes(GlobalDecl GD, mlir::Operation *GV) { assert(!UnimplementedFeature::setCommonAttributes()); } +void CIRGenModule::replaceGlobal(mlir::cir::GlobalOp Old, + mlir::cir::GlobalOp New) { + assert(Old.getSymName() == New.getSymName() && "symbol names must match"); + + // If the types does not match, update all references to Old to the new type. + auto OldTy = Old.getSymType(); + auto NewTy = New.getSymType(); + if (OldTy != NewTy) { + auto OldSymUses = Old.getSymbolUses(theModule.getOperation()); + if (OldSymUses.has_value()) { + for (auto Use : *OldSymUses) { + auto *UserOp = Use.getUser(); + assert((isa(UserOp) || + isa(UserOp)) && + "GlobalOp symbol user is neither a GetGlobalOp nor a GlobalOp"); + + if (auto GGO = dyn_cast(Use.getUser())) { + auto UseOpResultValue = GGO.getAddr(); + UseOpResultValue.setType( + mlir::cir::PointerType::get(builder.getContext(), NewTy)); + } + } + } + } + + // Remove old global from the module. + Old.erase(); +} + /// If the specified mangled name is not in the module, /// create and return an mlir GlobalOp with the specified type (TODO(cir): /// address space). @@ -592,11 +626,14 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef MangledName, mlir::Type Ty, // mlir::SymbolTable::Visibility::Public is the default, no need to explicitly // mark it as such. auto GV = CIRGenModule::createGlobalOp(*this, loc, MangledName, Ty, - /*isConstant=*/false); + /*isConstant=*/false, + /*insertPoint=*/Entry.getOperation()); // If we already created a global with the same mangled name (but different - // type) before, take its name and remove it from its parent. - assert(!Entry && "not implemented"); + // type) before, replace it with the new global. + if (Entry) { + replaceGlobal(Entry, GV); + } // This is the first use or definition of a mangled name. If there is a // deferred decl with this name, remember that we need to emit it at the end diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h index aeb1313b38c4..a0b30e7464ab 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.h +++ b/clang/lib/CIR/CodeGen/CIRGenModule.h @@ -220,7 +220,8 @@ class CIRGenModule : public CIRGenTypeCache { static mlir::cir::GlobalOp createGlobalOp(CIRGenModule &CGM, mlir::Location loc, StringRef name, - mlir::Type t, bool isCst = false); + mlir::Type t, bool isCst = false, + mlir::Operation *insertPoint = nullptr); /// Return the mlir::Value for the address of the given global variable. /// If Ty is non-null and if the global doesn't exist, then it will be created @@ -445,6 +446,14 @@ class CIRGenModule : public CIRGenTypeCache { void setGVProperties(mlir::Operation *Op, const NamedDecl *D) const; void setGVPropertiesAux(mlir::Operation *Op, const NamedDecl *D) const; + /// Replace the present global `Old` with the given global `New`. Their symbol + /// names must match; their types can be different. Usages of the old global + /// will be automatically updated if their types mismatch. + /// + /// This function will erase the old global. This function will NOT insert the + /// new global into the module. + void replaceGlobal(mlir::cir::GlobalOp Old, mlir::cir::GlobalOp New); + /// Determine whether the definition must be emitted; if this returns \c /// false, the definition can be emitted lazily if it's used. bool MustBeEmitted(const clang::ValueDecl *D); diff --git a/clang/test/CIR/CodeGen/array-unknown-bound.cpp b/clang/test/CIR/CodeGen/array-unknown-bound.cpp new file mode 100644 index 000000000000..09f75ca27f27 --- /dev/null +++ b/clang/test/CIR/CodeGen/array-unknown-bound.cpp @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o - | FileCheck %s + +extern int table[]; +// CHECK: cir.global external @table = #cir.const_array<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i]> : !cir.array + +int *table_ptr = table; +// CHECK: cir.global external @table_ptr = #cir.global_view<@table> : !cir.ptr + +int test() { return table[1]; } +// CHECK: cir.func @_Z4testv() -> !s32i extra( {inline = #cir.inline, optnone = #cir.optnone} ) { +// CHECK-NEXT: %0 = cir.alloca !s32i, cir.ptr , ["__retval"] {alignment = 4 : i64} +// CHECK-NEXT: %1 = cir.get_global @table : cir.ptr > + +int table[3] {1, 2, 3};