Skip to content

Commit dbdf947

Browse files
committed
[AIE] ReservedRegsLICM: don't sink if reg is live at latch entry
In a multi-block loop with conditional branches, a candidate register may be defined on some paths to the latch but not others. On the paths that skip the definition the register is live at the latch entry, carrying a value from a previous iteration. Sinking the last def from the latch to the exit block would expose that stale value to the instructions that use the register before the def in the latch. Fix this by pre-computing LatchEntryLive — the set of registers live at the entry of the latch block — and rejecting any sink candidate whose register appears in that set. Add a test (srs_dyn_sign_conditional_no_sink) where $srssign0 is defined in one conditional branch but not the other, and the latch uses it before redefining it. The pass must leave both the COPY and the MOVX_mvx_cr_imm in place.
1 parent 3662cde commit dbdf947

2 files changed

Lines changed: 99 additions & 4 deletions

File tree

llvm/lib/Target/AIE/ReservedRegsLICM.cpp

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -261,22 +261,36 @@ void ReservedRegsLICM::runOnLoop(MachineLoop &L) {
261261

262262
void ReservedRegsLICM::processForExitSink(MachineLoop &L,
263263
const BitVector &ReservedLiveins) {
264+
// Pre-compute what's live at the entry of the latch block by walking it
265+
// fully backward. For multi-block loops with conditional paths, a candidate
266+
// register may be defined in some branches but not others, making it live at
267+
// the latch entry on those paths. If the register is live at the latch entry
268+
// (i.e., used before its first def in the latch), sinking its last def to
269+
// the exit block would expose the wrong value on those paths.
270+
assert(L.getLoopLatch());
271+
LivePhysRegs LatchEntryLive(*TRI);
272+
for (const MachineInstr &MI : reverse(*L.getLoopLatch()))
273+
LatchEntryLive.stepBackward(MI);
274+
264275
RegDefMap PhysRegChanged(*TRI);
265276
LivePhysRegs LiveRegs(*TRI);
266277
Candidates SinkCandidates;
267278

268279
// Walk the latch block, track defs for each register, and
269280
// collect potential LICM candidates.
270-
assert(L.getLoopLatch());
271281
for (MachineInstr &MI : reverse(*L.getLoopLatch())) {
272282
CandidateInfo *CandInfo = SinkCandidates.getInfo(MI);
273283

274284
// First time we meet a reserved reg definition while iterating upwards.
275-
// If that def is not a loop livein and it isn't used in this block either,
276-
// then one can move the instruction to the exit BB of the loop.
285+
// If that def is not a loop livein, it isn't used in this block after the
286+
// def, and it isn't live at the latch entry (which would indicate it is
287+
// used before its def in the latch, possibly carrying a value from a
288+
// conditional path that skips the def), then one can move the instruction
289+
// to the exit BB of the loop.
277290
if (CandInfo && !PhysRegChanged.hasChanged(CandInfo->DefinedReg) &&
278291
!ReservedLiveins.test(CandInfo->DefinedReg) &&
279-
!LiveRegs.contains(CandInfo->DefinedReg)) {
292+
!LiveRegs.contains(CandInfo->DefinedReg) &&
293+
!LatchEntryLive.contains(CandInfo->DefinedReg)) {
280294
assert(!CandInfo->HoistCandidate);
281295
CandInfo->HoistCandidate = &MI;
282296
}

llvm/test/CodeGen/AIE/aie2ps/reserved-reg-licm.mir

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,3 +329,84 @@ body: |
329329
PseudoJNZ %0, %bb.2
330330
PseudoJ_jump_imm %bb.1
331331
...
332+
333+
# $srssign0 is defined in one conditional branch (bb.2) but not the other
334+
# (bb.3). The latch (bb.4) uses $srssign0 before defining it, so $srssign0 is
335+
# live at the latch entry on the path through bb.3. Sinking MOVX_mvx_cr_imm 0
336+
# from the latch to the exit would expose the wrong value on that path.
337+
# Neither the COPY nor the MOVX should be moved.
338+
---
339+
name: srs_dyn_sign_conditional_no_sink
340+
tracksRegLiveness: true
341+
body: |
342+
; CHECK-LABEL: name: srs_dyn_sign_conditional_no_sink
343+
; CHECK: bb.0:
344+
; CHECK-NEXT: successors: %bb.1(0x80000000)
345+
; CHECK-NEXT: liveins: $p0, $r0, $r1, $dm0, $s0
346+
; CHECK-NEXT: {{ $}}
347+
; CHECK-NEXT: [[COPY:%[0-9]+]]:er = COPY $r0
348+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0
349+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r1
350+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:edm = COPY $dm0
351+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:es = COPY $s0
352+
; CHECK-NEXT: PseudoJ_jump_imm %bb.1
353+
; CHECK-NEXT: {{ $}}
354+
; CHECK-NEXT: bb.1:
355+
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
356+
; CHECK-NEXT: {{ $}}
357+
; CHECK-NEXT: PseudoJNZ [[COPY]], %bb.3
358+
; CHECK-NEXT: {{ $}}
359+
; CHECK-NEXT: bb.2:
360+
; CHECK-NEXT: successors: %bb.4(0x80000000)
361+
; CHECK-NEXT: {{ $}}
362+
; CHECK-NEXT: $srssign0 = COPY [[COPY2]]
363+
; CHECK-NEXT: PseudoJ_jump_imm %bb.4
364+
; CHECK-NEXT: {{ $}}
365+
; CHECK-NEXT: bb.3:
366+
; CHECK-NEXT: successors: %bb.4(0x80000000)
367+
; CHECK-NEXT: {{ $}}
368+
; CHECK-NEXT: PseudoJ_jump_imm %bb.4
369+
; CHECK-NEXT: {{ $}}
370+
; CHECK-NEXT: bb.4:
371+
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.5(0x04000000)
372+
; CHECK-NEXT: {{ $}}
373+
; CHECK-NEXT: [[VSRS_4x_mv_x_srs_dm_srsSign0_:%[0-9]+]]:mxs = VSRS_4x_mv_x_srs_dm_srsSign0 [[COPY3]], [[COPY4]], implicit-def $srsrs_of, implicit $crrnd, implicit $crsrsmode, implicit $crsat, implicit $srssign0
374+
; CHECK-NEXT: $srssign0 = MOVX_mvx_cr_imm 0
375+
; CHECK-NEXT: PseudoJNZ [[COPY]], %bb.1
376+
; CHECK-NEXT: PseudoJ_jump_imm %bb.5
377+
; CHECK-NEXT: {{ $}}
378+
; CHECK-NEXT: bb.5:
379+
; CHECK-NEXT: PseudoRET implicit $lr
380+
bb.0:
381+
successors: %bb.1(0x80000000)
382+
liveins: $p0, $r0, $r1, $dm0, $s0
383+
%0:er = COPY $r0
384+
%1:ep = COPY $p0
385+
%2:er = COPY $r1
386+
%3:edm = COPY $dm0
387+
%4:es = COPY $s0
388+
PseudoJ_jump_imm %bb.1
389+
390+
bb.1:
391+
successors: %bb.2(0x40000000), %bb.3(0x40000000)
392+
PseudoJNZ %0, %bb.3
393+
394+
bb.2:
395+
successors: %bb.4(0x80000000)
396+
$srssign0 = COPY %2
397+
PseudoJ_jump_imm %bb.4
398+
399+
bb.3:
400+
successors: %bb.4(0x80000000)
401+
PseudoJ_jump_imm %bb.4
402+
403+
bb.4:
404+
successors: %bb.1(0x7c000000), %bb.5(0x04000000)
405+
%5:mxs = VSRS_4x_mv_x_srs_dm_srsSign0 %3, %4, implicit-def $srsrs_of, implicit $crrnd, implicit $crsrsmode, implicit $crsat, implicit $srssign0
406+
$srssign0 = MOVX_mvx_cr_imm 0
407+
PseudoJNZ %0, %bb.1
408+
PseudoJ_jump_imm %bb.5
409+
410+
bb.5:
411+
PseudoRET implicit $lr
412+
...

0 commit comments

Comments
 (0)