-
Notifications
You must be signed in to change notification settings - Fork 13.3k
[OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause #134709
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4899,6 +4899,151 @@ void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, | |
} | ||
} | ||
|
||
void CGOpenMPRuntime::emitPrivateReduction( | ||
CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, | ||
ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, | ||
ArrayRef<const Expr *> ReductionOps) { | ||
|
||
if (LHSExprs.empty() || Privates.empty() || ReductionOps.empty()) | ||
return; | ||
|
||
if (LHSExprs.size() != Privates.size() || | ||
LHSExprs.size() != ReductionOps.size()) | ||
return; | ||
|
||
QualType PrivateType = Privates[0]->getType(); | ||
llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType); | ||
|
||
BinaryOperatorKind MainBO = BO_Comma; | ||
if (const auto *BinOp = dyn_cast<BinaryOperator>(ReductionOps[0])) { | ||
if (const auto *RHSExpr = BinOp->getRHS()) { | ||
if (const auto *BORHS = | ||
dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { | ||
MainBO = BORHS->getOpcode(); | ||
} | ||
} | ||
} | ||
|
||
llvm::Constant *InitVal = llvm::Constant::getNullValue(LLVMType); | ||
const Expr *Private = Privates[0]; | ||
|
||
if (const auto *DRE = dyn_cast<DeclRefExpr>(Private)) { | ||
if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) { | ||
if (const Expr *Init = VD->getInit()) { | ||
if (Init->isConstantInitializer(CGF.getContext(), false)) { | ||
Expr::EvalResult Result; | ||
if (Init->EvaluateAsRValue(Result, CGF.getContext())) { | ||
APValue &InitValue = Result.Val; | ||
if (InitValue.isInt()) { | ||
InitVal = llvm::ConstantInt::get(LLVMType, InitValue.getInt()); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
// Create an internal shared variable | ||
std::string SharedName = getName({"internal_private_var"}); | ||
llvm::GlobalVariable *SharedVar = new llvm::GlobalVariable( | ||
CGM.getModule(), LLVMType, false, llvm::GlobalValue::CommonLinkage, | ||
InitVal, ".omp.reduction." + SharedName, nullptr, | ||
llvm::GlobalVariable::NotThreadLocal); | ||
|
||
SharedVar->setAlignment( | ||
llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8)); | ||
|
||
Address SharedResult(SharedVar, SharedVar->getValueType(), | ||
CGF.getContext().getTypeAlignInChars(PrivateType)); | ||
|
||
llvm::Value *ThreadId = getThreadID(CGF, Loc); | ||
llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); | ||
llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId}; | ||
|
||
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( | ||
CGM.getModule(), OMPRTL___kmpc_barrier), | ||
BarrierArgs); | ||
|
||
llvm::BasicBlock *InitBB = CGF.createBasicBlock("init"); | ||
llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end"); | ||
|
||
llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ( | ||
ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0)); | ||
CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB); | ||
|
||
CGF.EmitBlock(InitBB); | ||
CGF.Builder.CreateStore(InitVal, SharedResult); | ||
CGF.Builder.CreateBr(InitEndBB); | ||
|
||
CGF.EmitBlock(InitEndBB); | ||
|
||
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( | ||
CGM.getModule(), OMPRTL___kmpc_barrier), | ||
BarrierArgs); | ||
|
||
for (unsigned I : | ||
llvm::seq<unsigned>(std::min(ReductionOps.size(), LHSExprs.size()))) { | ||
if (I >= LHSExprs.size()) { | ||
break; | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It can be removed There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done !! |
||
|
||
const auto *BinOp = dyn_cast<BinaryOperator>(ReductionOps[I]); | ||
if (!BinOp || BinOp->getOpcode() != BO_Assign) | ||
continue; | ||
|
||
const Expr *RHSExpr = BinOp->getRHS(); | ||
if (!RHSExpr) | ||
continue; | ||
|
||
BinaryOperatorKind BO = BO_Comma; | ||
if (const auto *BORHS = | ||
dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { | ||
BO = BORHS->getOpcode(); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do you need to look through expressions? You should emit them as is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is required , We need to look through the RHS to get the actual reduction operator. For instance:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What if this is an operator (function call), not an opcode? This may happen for classes with the user-defined reductions There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @alexey-bataev Fixing by adding CXXOperatorCallExpr to handle user-defined reduction operators. |
||
|
||
LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType); | ||
LValue LHSLV = CGF.EmitLValue(LHSExprs[I]); | ||
RValue PrivateRV = CGF.EmitLoadOfLValue(LHSLV, Loc); | ||
auto UpdateOp = [&](RValue OldVal) { | ||
if (BO == BO_Mul) { | ||
llvm::Value *OldScalar = OldVal.getScalarVal(); | ||
llvm::Value *PrivateScalar = PrivateRV.getScalarVal(); | ||
llvm::Value *Result = CGF.Builder.CreateMul(OldScalar, PrivateScalar); | ||
return RValue::get(Result); | ||
} else { | ||
OpaqueValueExpr OVE(BinOp->getLHS()->getExprLoc(), | ||
BinOp->getLHS()->getType(), | ||
ExprValueKind::VK_PRValue); | ||
CodeGenFunction::OpaqueValueMapping OldValMapping(CGF, &OVE, OldVal); | ||
return CGF.EmitAnyExpr(BinOp->getRHS()); | ||
} | ||
}; | ||
|
||
(void)CGF.EmitOMPAtomicSimpleUpdateExpr( | ||
SharedLV, PrivateRV, BO, true, | ||
llvm::AtomicOrdering::SequentiallyConsistent, Loc, UpdateOp); | ||
} | ||
|
||
// Final barrier | ||
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( | ||
CGM.getModule(), OMPRTL___kmpc_barrier), | ||
BarrierArgs); | ||
|
||
// Broadcast final result | ||
llvm::Value *FinalResult = CGF.Builder.CreateLoad(SharedResult); | ||
|
||
// Update private variables with final result | ||
for (unsigned I : llvm::seq<unsigned>(Privates.size())) { | ||
LValue LHSLV = CGF.EmitLValue(LHSExprs[I]); | ||
CGF.Builder.CreateStore(FinalResult, LHSLV.getAddress()); | ||
} | ||
|
||
// Final synchronization | ||
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( | ||
CGM.getModule(), OMPRTL___kmpc_barrier), | ||
BarrierArgs); | ||
} | ||
|
||
void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, | ||
ArrayRef<const Expr *> Privates, | ||
ArrayRef<const Expr *> LHSExprs, | ||
|
@@ -5201,6 +5346,8 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, | |
|
||
CGF.EmitBranch(DefaultBB); | ||
CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); | ||
if (Options.IsPrivateVarReduction) | ||
emitPrivateReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, ReductionOps); | ||
} | ||
|
||
/// Generates unique name for artificial threadprivate variables. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it actually safe to use the shared var between threads?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should be good, using atomic operations with SequentiallyConsistent ordering also have synchronization barriers at critical points.