Skip to content

[flang][fir] Basic lowering fir.do_concurrent locality specs to fir.do_loop ... unordered #138512

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 64 additions & 1 deletion flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,17 @@ mlir::LogicalResult BoxTotalElementsConversion::matchAndRewrite(

class DoConcurrentConversion
: public mlir::OpRewritePattern<fir::DoConcurrentOp> {
/// Looks up from the operation from and returns the LocalitySpecifierOp with
/// name symbolName
static fir::LocalitySpecifierOp
findLocalizer(mlir::Operation *from, mlir::SymbolRefAttr symbolName) {
fir::LocalitySpecifierOp localizer =
mlir::SymbolTable::lookupNearestSymbolFrom<fir::LocalitySpecifierOp>(
from, symbolName);
assert(localizer && "localizer not found in the symbol table");
return localizer;
}

public:
using mlir::OpRewritePattern<fir::DoConcurrentOp>::OpRewritePattern;

Expand All @@ -162,7 +173,59 @@ class DoConcurrentConversion
assert(loop.getRegion().hasOneBlock());
mlir::Block &loopBlock = loop.getRegion().getBlocks().front();

// Collect iteration variable(s) allocations do that we can move them
// Handle localization
if (!loop.getLocalVars().empty()) {
mlir::OpBuilder::InsertionGuard guard(rewriter);
rewriter.setInsertionPointToStart(&loop.getRegion().front());

std::optional<mlir::ArrayAttr> localSyms = loop.getLocalSyms();

for (auto [localVar, localArg, localizerSym] : llvm::zip_equal(
loop.getLocalVars(), loop.getRegionLocalArgs(), *localSyms)) {
mlir::SymbolRefAttr localizerName =
llvm::cast<mlir::SymbolRefAttr>(localizerSym);
fir::LocalitySpecifierOp localizer = findLocalizer(loop, localizerName);

if (!localizer.getInitRegion().empty() ||
!localizer.getDeallocRegion().empty())
TODO(localizer.getLoc(), "localizers with `init` and `dealloc` "
"regions are not handled yet.");

// TODO Should this be a heap allocation instead? For now, we allocate
// on the stack for each loop iteration.
mlir::Value localAlloc =
rewriter.create<fir::AllocaOp>(loop.getLoc(), localizer.getType());
Comment on lines +196 to +197
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if this should be done on the heap because it will happen for every loop iteration. Does the spec say that it has to be on the stack?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nothing in the spec says it has to be on the stack AFAICT. However, this is following the behavior of handleLocalitySpecs in Bridge.cpp (without delayed localization). Added a todo, let me know if you disagree with looking into this later.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please could you use TODO(loc, "message") so that the compiler crashes (in a controlled way) instead of producing incorrect code if any of these do sneak through.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you meant for the init and dealloc todo right?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ahh yes, sorry for commenting in a confusing place

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All good. Done.


if (localizer.getLocalitySpecifierType() ==
fir::LocalitySpecifierType::LocalInit) {
// It is reasonable to make this assumption since, at this stage,
// control-flow ops are not converted yet. Therefore, things like `if`
// conditions will still be represented by their encapsulating `fir`
// dialect ops.
assert(localizer.getCopyRegion().hasOneBlock() &&
"Expected localizer to have a single block.");
mlir::Block *beforeLocalInit = rewriter.getInsertionBlock();
mlir::Block *afterLocalInit = rewriter.splitBlock(
rewriter.getInsertionBlock(), rewriter.getInsertionPoint());
rewriter.cloneRegionBefore(localizer.getCopyRegion(), afterLocalInit);
mlir::Block *copyRegionBody = beforeLocalInit->getNextNode();

rewriter.eraseOp(copyRegionBody->getTerminator());
rewriter.mergeBlocks(afterLocalInit, copyRegionBody);
rewriter.mergeBlocks(copyRegionBody, beforeLocalInit,
{localVar, localArg});
}

rewriter.replaceAllUsesWith(localArg, localAlloc);
}

loop.getRegion().front().eraseArguments(loop.getNumInductionVars(),
loop.getNumLocalOperands());
loop.getLocalVarsMutable().clear();
loop.setLocalSymsAttr(nullptr);
}

// Collect iteration variable(s) allocations so that we can move them
// outside the `fir.do_concurrent` wrapper.
llvm::SmallVector<mlir::Operation *> opsToMove;
for (mlir::Operation &op : llvm::drop_end(wrapperBlock))
Expand Down
61 changes: 61 additions & 0 deletions flang/test/Transforms/do_concurrent-to-do_loop-unodered.fir
Original file line number Diff line number Diff line change
Expand Up @@ -121,3 +121,64 @@ func.func @dc_2d_reduction(%i_lb: index, %i_ub: index, %i_st: index,
// CHECK: }
// CHECK: return
// CHECK: }

// -----

fir.local {type = local} @local_localizer : i32

fir.local {type = local_init} @local_init_localizer : i32 copy {
^bb0(%arg0: !fir.ref<i32>, %arg1: !fir.ref<i32>):
%0 = fir.load %arg0 : !fir.ref<i32>
fir.store %0 to %arg1 : !fir.ref<i32>
fir.yield(%arg1 : !fir.ref<i32>)
}

func.func @do_concurrent_locality_specs() {
%3 = fir.alloca i32 {bindc_name = "local_init_var", uniq_name = "_QFdo_concurrentElocal_init_var"}
%4:2 = hlfir.declare %3 {uniq_name = "_QFdo_concurrentElocal_init_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
%5 = fir.alloca i32 {bindc_name = "local_var", uniq_name = "_QFdo_concurrentElocal_var"}
%6:2 = hlfir.declare %5 {uniq_name = "_QFdo_concurrentElocal_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
%c1 = arith.constant 1 : index
%c10 = arith.constant 1 : index
fir.do_concurrent {
%9 = fir.alloca i32 {bindc_name = "i"}
%10:2 = hlfir.declare %9 {uniq_name = "_QFdo_concurrentEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
fir.do_concurrent.loop (%arg0) = (%c1) to (%c10) step (%c1) local(@local_localizer %6#0 -> %arg1, @local_init_localizer %4#0 -> %arg2 : !fir.ref<i32>, !fir.ref<i32>) {
%11 = fir.convert %arg0 : (index) -> i32
fir.store %11 to %10#0 : !fir.ref<i32>
%13:2 = hlfir.declare %arg1 {uniq_name = "_QFdo_concurrentElocal_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
%15:2 = hlfir.declare %arg2 {uniq_name = "_QFdo_concurrentElocal_init_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
%17 = fir.load %10#0 : !fir.ref<i32>
%c5_i32 = arith.constant 5 : i32
%18 = arith.cmpi slt, %17, %c5_i32 : i32
fir.if %18 {
%c42_i32 = arith.constant 42 : i32
hlfir.assign %c42_i32 to %13#0 : i32, !fir.ref<i32>
} else {
%c84_i32 = arith.constant 84 : i32
hlfir.assign %c84_i32 to %15#0 : i32, !fir.ref<i32>
}
}
}
return
}

// CHECK-LABEL: func.func @do_concurrent_locality_specs() {
// CHECK: %[[LOC_INIT_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "{{.*}}Elocal_init_var"}
// CHECK: fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered {
// Verify localization of the `local` var.
// CHECK: %[[PRIV_LOC_ALLOC:.*]] = fir.alloca i32

// Verify localization of the `local_init` var.
// CHECK: %[[PRIV_LOC_INIT_ALLOC:.*]] = fir.alloca i32
// CHECK: %[[LOC_INIT_VAL:.*]] = fir.load %[[LOC_INIT_DECL]]#0 : !fir.ref<i32>
// CHECK: fir.store %[[LOC_INIT_VAL]] to %[[PRIV_LOC_INIT_ALLOC]] : !fir.ref<i32>

// CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[PRIV_LOC_ALLOC]]
// CHECK: %[[VAL_16:.*]]:2 = hlfir.declare %[[PRIV_LOC_INIT_ALLOC]]

// CHECK: hlfir.assign %{{.*}} to %[[VAL_15]]#0 : i32, !fir.ref<i32>
// CHECK: hlfir.assign %{{.*}} to %[[VAL_16]]#0 : i32, !fir.ref<i32>
// CHECK: }
// CHECK: return
// CHECK: }