Skip to content

Commit 2cc8734

Browse files
authored
[flang][fir] Basic lowering fir.do_concurrent locality specs to fir.do_loop ... unordered (#138512)
Extends lowering `fir.do_concurrent` to `fir.do_loop ... unordered` by adding support for locality specifiers. In particular, for `local` specifiers, a `fir.alloca` op is created using the localizer type. For `local_init` specifiers, the `copy` region is additionally inlined in the `do concurrent` loop's body. PR stack: - #137928 - #138505 - #138506 - #138512 (this PR) - #138534 - #138816
1 parent fb7d114 commit 2cc8734

File tree

2 files changed

+125
-1
lines changed

2 files changed

+125
-1
lines changed

flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,17 @@ mlir::LogicalResult BoxTotalElementsConversion::matchAndRewrite(
149149

150150
class DoConcurrentConversion
151151
: public mlir::OpRewritePattern<fir::DoConcurrentOp> {
152+
/// Looks up from the operation from and returns the LocalitySpecifierOp with
153+
/// name symbolName
154+
static fir::LocalitySpecifierOp
155+
findLocalizer(mlir::Operation *from, mlir::SymbolRefAttr symbolName) {
156+
fir::LocalitySpecifierOp localizer =
157+
mlir::SymbolTable::lookupNearestSymbolFrom<fir::LocalitySpecifierOp>(
158+
from, symbolName);
159+
assert(localizer && "localizer not found in the symbol table");
160+
return localizer;
161+
}
162+
152163
public:
153164
using mlir::OpRewritePattern<fir::DoConcurrentOp>::OpRewritePattern;
154165

@@ -162,7 +173,59 @@ class DoConcurrentConversion
162173
assert(loop.getRegion().hasOneBlock());
163174
mlir::Block &loopBlock = loop.getRegion().getBlocks().front();
164175

165-
// Collect iteration variable(s) allocations do that we can move them
176+
// Handle localization
177+
if (!loop.getLocalVars().empty()) {
178+
mlir::OpBuilder::InsertionGuard guard(rewriter);
179+
rewriter.setInsertionPointToStart(&loop.getRegion().front());
180+
181+
std::optional<mlir::ArrayAttr> localSyms = loop.getLocalSyms();
182+
183+
for (auto [localVar, localArg, localizerSym] : llvm::zip_equal(
184+
loop.getLocalVars(), loop.getRegionLocalArgs(), *localSyms)) {
185+
mlir::SymbolRefAttr localizerName =
186+
llvm::cast<mlir::SymbolRefAttr>(localizerSym);
187+
fir::LocalitySpecifierOp localizer = findLocalizer(loop, localizerName);
188+
189+
if (!localizer.getInitRegion().empty() ||
190+
!localizer.getDeallocRegion().empty())
191+
TODO(localizer.getLoc(), "localizers with `init` and `dealloc` "
192+
"regions are not handled yet.");
193+
194+
// TODO Should this be a heap allocation instead? For now, we allocate
195+
// on the stack for each loop iteration.
196+
mlir::Value localAlloc =
197+
rewriter.create<fir::AllocaOp>(loop.getLoc(), localizer.getType());
198+
199+
if (localizer.getLocalitySpecifierType() ==
200+
fir::LocalitySpecifierType::LocalInit) {
201+
// It is reasonable to make this assumption since, at this stage,
202+
// control-flow ops are not converted yet. Therefore, things like `if`
203+
// conditions will still be represented by their encapsulating `fir`
204+
// dialect ops.
205+
assert(localizer.getCopyRegion().hasOneBlock() &&
206+
"Expected localizer to have a single block.");
207+
mlir::Block *beforeLocalInit = rewriter.getInsertionBlock();
208+
mlir::Block *afterLocalInit = rewriter.splitBlock(
209+
rewriter.getInsertionBlock(), rewriter.getInsertionPoint());
210+
rewriter.cloneRegionBefore(localizer.getCopyRegion(), afterLocalInit);
211+
mlir::Block *copyRegionBody = beforeLocalInit->getNextNode();
212+
213+
rewriter.eraseOp(copyRegionBody->getTerminator());
214+
rewriter.mergeBlocks(afterLocalInit, copyRegionBody);
215+
rewriter.mergeBlocks(copyRegionBody, beforeLocalInit,
216+
{localVar, localArg});
217+
}
218+
219+
rewriter.replaceAllUsesWith(localArg, localAlloc);
220+
}
221+
222+
loop.getRegion().front().eraseArguments(loop.getNumInductionVars(),
223+
loop.getNumLocalOperands());
224+
loop.getLocalVarsMutable().clear();
225+
loop.setLocalSymsAttr(nullptr);
226+
}
227+
228+
// Collect iteration variable(s) allocations so that we can move them
166229
// outside the `fir.do_concurrent` wrapper.
167230
llvm::SmallVector<mlir::Operation *> opsToMove;
168231
for (mlir::Operation &op : llvm::drop_end(wrapperBlock))

flang/test/Transforms/do_concurrent-to-do_loop-unodered.fir

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,3 +121,64 @@ func.func @dc_2d_reduction(%i_lb: index, %i_ub: index, %i_st: index,
121121
// CHECK: }
122122
// CHECK: return
123123
// CHECK: }
124+
125+
// -----
126+
127+
fir.local {type = local} @local_localizer : i32
128+
129+
fir.local {type = local_init} @local_init_localizer : i32 copy {
130+
^bb0(%arg0: !fir.ref<i32>, %arg1: !fir.ref<i32>):
131+
%0 = fir.load %arg0 : !fir.ref<i32>
132+
fir.store %0 to %arg1 : !fir.ref<i32>
133+
fir.yield(%arg1 : !fir.ref<i32>)
134+
}
135+
136+
func.func @do_concurrent_locality_specs() {
137+
%3 = fir.alloca i32 {bindc_name = "local_init_var", uniq_name = "_QFdo_concurrentElocal_init_var"}
138+
%4:2 = hlfir.declare %3 {uniq_name = "_QFdo_concurrentElocal_init_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
139+
%5 = fir.alloca i32 {bindc_name = "local_var", uniq_name = "_QFdo_concurrentElocal_var"}
140+
%6:2 = hlfir.declare %5 {uniq_name = "_QFdo_concurrentElocal_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
141+
%c1 = arith.constant 1 : index
142+
%c10 = arith.constant 1 : index
143+
fir.do_concurrent {
144+
%9 = fir.alloca i32 {bindc_name = "i"}
145+
%10:2 = hlfir.declare %9 {uniq_name = "_QFdo_concurrentEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
146+
fir.do_concurrent.loop (%arg0) = (%c1) to (%c10) step (%c1) local(@local_localizer %6#0 -> %arg1, @local_init_localizer %4#0 -> %arg2 : !fir.ref<i32>, !fir.ref<i32>) {
147+
%11 = fir.convert %arg0 : (index) -> i32
148+
fir.store %11 to %10#0 : !fir.ref<i32>
149+
%13:2 = hlfir.declare %arg1 {uniq_name = "_QFdo_concurrentElocal_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
150+
%15:2 = hlfir.declare %arg2 {uniq_name = "_QFdo_concurrentElocal_init_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
151+
%17 = fir.load %10#0 : !fir.ref<i32>
152+
%c5_i32 = arith.constant 5 : i32
153+
%18 = arith.cmpi slt, %17, %c5_i32 : i32
154+
fir.if %18 {
155+
%c42_i32 = arith.constant 42 : i32
156+
hlfir.assign %c42_i32 to %13#0 : i32, !fir.ref<i32>
157+
} else {
158+
%c84_i32 = arith.constant 84 : i32
159+
hlfir.assign %c84_i32 to %15#0 : i32, !fir.ref<i32>
160+
}
161+
}
162+
}
163+
return
164+
}
165+
166+
// CHECK-LABEL: func.func @do_concurrent_locality_specs() {
167+
// CHECK: %[[LOC_INIT_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "{{.*}}Elocal_init_var"}
168+
// CHECK: fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered {
169+
// Verify localization of the `local` var.
170+
// CHECK: %[[PRIV_LOC_ALLOC:.*]] = fir.alloca i32
171+
172+
// Verify localization of the `local_init` var.
173+
// CHECK: %[[PRIV_LOC_INIT_ALLOC:.*]] = fir.alloca i32
174+
// CHECK: %[[LOC_INIT_VAL:.*]] = fir.load %[[LOC_INIT_DECL]]#0 : !fir.ref<i32>
175+
// CHECK: fir.store %[[LOC_INIT_VAL]] to %[[PRIV_LOC_INIT_ALLOC]] : !fir.ref<i32>
176+
177+
// CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[PRIV_LOC_ALLOC]]
178+
// CHECK: %[[VAL_16:.*]]:2 = hlfir.declare %[[PRIV_LOC_INIT_ALLOC]]
179+
180+
// CHECK: hlfir.assign %{{.*}} to %[[VAL_15]]#0 : i32, !fir.ref<i32>
181+
// CHECK: hlfir.assign %{{.*}} to %[[VAL_16]]#0 : i32, !fir.ref<i32>
182+
// CHECK: }
183+
// CHECK: return
184+
// CHECK: }

0 commit comments

Comments
 (0)