Skip to content

[OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause #134709

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
147 changes: 147 additions & 0 deletions clang/lib/CodeGen/CGOpenMPRuntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4899,6 +4899,151 @@ void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
}
}

void CGOpenMPRuntime::emitPrivateReduction(
CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
ArrayRef<const Expr *> ReductionOps) {

if (LHSExprs.empty() || Privates.empty() || ReductionOps.empty())
return;

if (LHSExprs.size() != Privates.size() ||
LHSExprs.size() != ReductionOps.size())
return;

QualType PrivateType = Privates[0]->getType();
llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);

BinaryOperatorKind MainBO = BO_Comma;
if (const auto *BinOp = dyn_cast<BinaryOperator>(ReductionOps[0])) {
if (const auto *RHSExpr = BinOp->getRHS()) {
if (const auto *BORHS =
dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
MainBO = BORHS->getOpcode();
}
}
}

llvm::Constant *InitVal = llvm::Constant::getNullValue(LLVMType);
const Expr *Private = Privates[0];

if (const auto *DRE = dyn_cast<DeclRefExpr>(Private)) {
if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
if (const Expr *Init = VD->getInit()) {
if (Init->isConstantInitializer(CGF.getContext(), false)) {
Expr::EvalResult Result;
if (Init->EvaluateAsRValue(Result, CGF.getContext())) {
APValue &InitValue = Result.Val;
if (InitValue.isInt()) {
InitVal = llvm::ConstantInt::get(LLVMType, InitValue.getInt());
}
}
}
}
}
}

// Create an internal shared variable
std::string SharedName = getName({"internal_private_var"});
llvm::GlobalVariable *SharedVar = new llvm::GlobalVariable(
CGM.getModule(), LLVMType, false, llvm::GlobalValue::CommonLinkage,
InitVal, ".omp.reduction." + SharedName, nullptr,
llvm::GlobalVariable::NotThreadLocal);

SharedVar->setAlignment(
llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
Comment on lines +4953 to +4954
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it actually safe to use the shared var between threads?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be good, using atomic operations with SequentiallyConsistent ordering also have synchronization barriers at critical points.


Address SharedResult(SharedVar, SharedVar->getValueType(),
CGF.getContext().getTypeAlignInChars(PrivateType));

llvm::Value *ThreadId = getThreadID(CGF, Loc);
llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};

CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_barrier),
BarrierArgs);

llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");

llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);

CGF.EmitBlock(InitBB);
CGF.Builder.CreateStore(InitVal, SharedResult);
CGF.Builder.CreateBr(InitEndBB);

CGF.EmitBlock(InitEndBB);

CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_barrier),
BarrierArgs);

for (unsigned I :
llvm::seq<unsigned>(std::min(ReductionOps.size(), LHSExprs.size()))) {
if (I >= LHSExprs.size()) {
break;
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It can be removed

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done !!


const auto *BinOp = dyn_cast<BinaryOperator>(ReductionOps[I]);
if (!BinOp || BinOp->getOpcode() != BO_Assign)
continue;

const Expr *RHSExpr = BinOp->getRHS();
if (!RHSExpr)
continue;

BinaryOperatorKind BO = BO_Comma;
if (const auto *BORHS =
dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
BO = BORHS->getOpcode();
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you need to look through expressions? You should emit them as is

Copy link
Contributor Author

@chandraghale chandraghale Apr 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is required , We need to look through the RHS to get the actual reduction operator. For instance:

`-BinaryOperator 0xce76050 <line:5:5, col:17> 'int' lvalue '='    // Assignment operator
  |-DeclRefExpr 0xce75fa0 <col:5> 'int' lvalue Var 0xce75678 'sum'  // LHS
  `-BinaryOperator 0xce76030 <col:11, col:17> 'int' '+'           // RHS contains actual reduction op
    |-ImplicitCastExpr 0xce76000 <col:11> 'int' <LValueToRValue>
    | `-DeclRefExpr 0xce75fc0 <col:11> 'int' lvalue Var 0xce75678 'sum'
    `-ImplicitCastExpr 0xce76018 <col:17> 'int' <LValueToRValue>
      `-DeclRefExpr 0xce75fe0 <col:17> 'int' lvalue Var 0xce75e00 'i
```'

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if this is an operator (function call), not an opcode? This may happen for classes with the user-defined reductions

Copy link
Contributor Author

@chandraghale chandraghale Apr 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@alexey-bataev Fixing by adding CXXOperatorCallExpr to handle user-defined reduction operators.


LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType);
LValue LHSLV = CGF.EmitLValue(LHSExprs[I]);
RValue PrivateRV = CGF.EmitLoadOfLValue(LHSLV, Loc);
auto UpdateOp = [&](RValue OldVal) {
if (BO == BO_Mul) {
llvm::Value *OldScalar = OldVal.getScalarVal();
llvm::Value *PrivateScalar = PrivateRV.getScalarVal();
llvm::Value *Result = CGF.Builder.CreateMul(OldScalar, PrivateScalar);
return RValue::get(Result);
} else {
OpaqueValueExpr OVE(BinOp->getLHS()->getExprLoc(),
BinOp->getLHS()->getType(),
ExprValueKind::VK_PRValue);
CodeGenFunction::OpaqueValueMapping OldValMapping(CGF, &OVE, OldVal);
return CGF.EmitAnyExpr(BinOp->getRHS());
}
};

(void)CGF.EmitOMPAtomicSimpleUpdateExpr(
SharedLV, PrivateRV, BO, true,
llvm::AtomicOrdering::SequentiallyConsistent, Loc, UpdateOp);
}

// Final barrier
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_barrier),
BarrierArgs);

// Broadcast final result
llvm::Value *FinalResult = CGF.Builder.CreateLoad(SharedResult);

// Update private variables with final result
for (unsigned I : llvm::seq<unsigned>(Privates.size())) {
LValue LHSLV = CGF.EmitLValue(LHSExprs[I]);
CGF.Builder.CreateStore(FinalResult, LHSLV.getAddress());
}

// Final synchronization
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_barrier),
BarrierArgs);
}

void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
ArrayRef<const Expr *> Privates,
ArrayRef<const Expr *> LHSExprs,
Expand Down Expand Up @@ -5201,6 +5346,8 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,

CGF.EmitBranch(DefaultBB);
CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
if (Options.IsPrivateVarReduction)
emitPrivateReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, ReductionOps);
}

/// Generates unique name for artificial threadprivate variables.
Expand Down
14 changes: 14 additions & 0 deletions clang/lib/CodeGen/CGOpenMPRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -1201,8 +1201,22 @@ class CGOpenMPRuntime {
struct ReductionOptionsTy {
bool WithNowait;
bool SimpleReduction;
bool IsPrivateVarReduction;
OpenMPDirectiveKind ReductionKind;
};

/// Emits code for private variable reduction
/// \param Privates List of private copies for original reduction arguments.
/// \param LHSExprs List of LHS in \a ReductionOps reduction operations.
/// \param RHSExprs List of RHS in \a ReductionOps reduction operations.
/// \param ReductionOps List of reduction operations in form 'LHS binop RHS'
/// or 'operator binop(LHS, RHS)'.
void emitPrivateReduction(CodeGenFunction &CGF, SourceLocation Loc,
ArrayRef<const Expr *> Privates,
ArrayRef<const Expr *> LHSExprs,
ArrayRef<const Expr *> RHSExprs,
ArrayRef<const Expr *> ReductionOps);

/// Emit a code for reduction clause. Next code should be emitted for
/// reduction:
/// \code
Expand Down
12 changes: 9 additions & 3 deletions clang/lib/CodeGen/CGStmtOpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1470,6 +1470,7 @@ void CodeGenFunction::EmitOMPReductionClauseFinal(
llvm::SmallVector<const Expr *, 8> LHSExprs;
llvm::SmallVector<const Expr *, 8> RHSExprs;
llvm::SmallVector<const Expr *, 8> ReductionOps;
llvm::SmallVector<bool, 8> IsPrivate;
bool HasAtLeastOneReduction = false;
bool IsReductionWithTaskMod = false;
for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
Expand All @@ -1480,6 +1481,8 @@ void CodeGenFunction::EmitOMPReductionClauseFinal(
Privates.append(C->privates().begin(), C->privates().end());
LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
IsPrivate.append(C->private_var_reduction_flags().begin(),
C->private_var_reduction_flags().end());
ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
IsReductionWithTaskMod =
IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
Expand All @@ -1499,9 +1502,11 @@ void CodeGenFunction::EmitOMPReductionClauseFinal(
bool SimpleReduction = ReductionKind == OMPD_simd;
// Emit nowait reduction if nowait clause is present or directive is a
// parallel directive (it always has implicit barrier).
bool IsPrivateVarReduction =
llvm::any_of(IsPrivate, [](bool IsPriv) { return IsPriv; });
CGM.getOpenMPRuntime().emitReduction(
*this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
{WithNowait, SimpleReduction, ReductionKind});
{WithNowait, SimpleReduction, IsPrivateVarReduction, ReductionKind});
}
}

Expand Down Expand Up @@ -3943,7 +3948,8 @@ static void emitScanBasedDirective(
PrivScope.Privatize();
CGF.CGM.getOpenMPRuntime().emitReduction(
CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
{/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown});
{/*WithNowait=*/true, /*SimpleReduction=*/true,
/*IsPrivateVarReduction */ false, OMPD_unknown});
}
llvm::Value *NextIVal =
CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1));
Expand Down Expand Up @@ -5747,7 +5753,7 @@ void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
}
CGM.getOpenMPRuntime().emitReduction(
*this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
{/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd});
{/*WithNowait=*/true, /*SimpleReduction=*/true, false, OMPD_simd});
for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
const Expr *PrivateExpr = Privates[I];
LValue DestLVal;
Expand Down
Loading