Skip to content

Commit 8dd91f4

Browse files
Extend ZOL to recognize static bounded loops
1 parent 85e2abc commit 8dd91f4

File tree

5 files changed

+106
-12
lines changed

5 files changed

+106
-12
lines changed

llvm/include/llvm/Transforms/Utils/LoopUtils.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,8 @@ MDNode *updateIterCounts(LLVMContext &Context, MDNode *LoopID,
287287
std::optional<int64_t> getMinTripCount(const MDNode *LoopID);
288288

289289
/// Get Minimum Trip Count of the Loop
290-
std::optional<int64_t> getMinTripCount(const Loop *L);
290+
std::optional<int64_t> getMinTripCount(const Loop *L,
291+
ScalarEvolution *SE = nullptr);
291292

292293
/// Look for the loop attribute that disables all transformation heuristic.
293294
bool hasDisableAllTransformsHint(const Loop *L);

llvm/lib/Target/AIE/AIE2TargetTransformInfo.cpp

+5-10
Original file line numberDiff line numberDiff line change
@@ -119,19 +119,14 @@ bool AIE2TTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
119119
return false;
120120

121121
if (!ForceHLGeneration) {
122-
if (const MDNode *LoopID = L->getLoopID()) {
123-
std::optional<int64_t> MinTripCount = getMinTripCount(LoopID);
124-
if (MinTripCount) {
125-
// Reject HL for this case.
126-
if (*MinTripCount <= MinIterCountHLReject) {
127-
return false;
128-
}
129-
} else {
130-
// We have metadata, but not iteration information.
122+
std::optional<int64_t> MinTripCount = getMinTripCount(L, &SE);
123+
if (MinTripCount) {
124+
// Reject HL for this case.
125+
if (*MinTripCount <= MinIterCountHLReject) {
131126
return false;
132127
}
133128
} else {
134-
// We don't have metadata.
129+
// We have metadata, but not iteration information.
135130
return false;
136131
}
137132
}

llvm/lib/Transforms/Utils/LoopUtils.cpp

+9-1
Original file line numberDiff line numberDiff line change
@@ -447,7 +447,15 @@ std::optional<int64_t> llvm::getMinTripCount(const MDNode *LoopID) {
447447
return std::nullopt;
448448
}
449449

450-
std::optional<int64_t> llvm::getMinTripCount(const Loop *L) {
450+
std::optional<int64_t> llvm::getMinTripCount(const Loop *L,
451+
ScalarEvolution *SE) {
452+
if (SE && L->isRotatedForm()) {
453+
if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
454+
const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
455+
if (const SCEVConstant *CT = dyn_cast<SCEVConstant>(BackedgeTakenCount))
456+
return CT->getValue()->getSExtValue() + 1;
457+
}
458+
}
451459
return getMinTripCount(L->getLoopID());
452460
}
453461

llvm/test/CodeGen/AIE/aie2/hardware-loops/zol-loop.ll

+42
Original file line numberDiff line numberDiff line change
@@ -72,3 +72,45 @@ for.body: ; preds = %entry, %for.body
7272
%exitcond.not = icmp eq i32 %inc, %n
7373
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
7474
}
75+
76+
define i32 @static_bounded_loop(i32 %num) {
77+
; CHECK-LABEL: static_bounded_loop:
78+
; CHECK: .p2align 4
79+
; CHECK-NEXT: // %bb.0: // %entry
80+
; CHECK-NEXT: nopb ; nopa ; nops ; movxm ls, #.LBB1_1; nopv
81+
; CHECK-NEXT: mova r2, #64; nopb ; movxm le, #.L_LEnd1
82+
; CHECK-NEXT: add.nc lc, r2, #0
83+
; CHECK-NEXT: nopb ; nopa ; nops ; nopxm ; nopv
84+
; CHECK-NEXT: nopb ; nopa ; nops ; nopxm ; nopv
85+
; CHECK-NEXT: nopb ; nopa ; nops ; nopxm ; nopv
86+
; CHECK-NEXT: nopb ; nopa ; nops ; nopxm ; nopv
87+
; CHECK-NEXT: nopb ; nopa ; nops ; nopxm ; nopv
88+
; CHECK-NEXT: nopb ; nopa ; nops ; nopxm ; nopv
89+
; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; mov r0, r1; nopv
90+
; CHECK-NEXT: .p2align 4
91+
; CHECK-NEXT: .LBB1_1: // %for.body
92+
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
93+
; CHECK-NEXT: nopb ; nopa ; nops ; mul r0, r0, r0; nopm ; nopv
94+
; CHECK-NEXT: .L_LEnd1:
95+
; CHECK-NEXT: nopb ; nopa ; nops ; nopxm ; nopv
96+
; CHECK-NEXT: // %bb.2: // %for.cond.cleanup
97+
; CHECK-NEXT: nopa ; ret lr
98+
; CHECK-NEXT: nop // Delay Slot 5
99+
; CHECK-NEXT: nop // Delay Slot 4
100+
; CHECK-NEXT: nop // Delay Slot 3
101+
; CHECK-NEXT: nop // Delay Slot 2
102+
; CHECK-NEXT: nop // Delay Slot 1
103+
entry:
104+
br label %for.body
105+
106+
for.cond.cleanup: ; preds = %for.body
107+
ret i32 %mul
108+
109+
for.body: ; preds = %entry, %for.body
110+
%i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
111+
%x.04 = phi i32 [ %num, %entry ], [ %mul, %for.body ]
112+
%mul = mul nsw i32 %x.04, %x.04
113+
%inc = add nuw nsw i32 %i.05, 1
114+
%exitcond.not = icmp eq i32 %inc, 64
115+
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
116+
}

llvm/test/CodeGen/AIE/aie2/hardware-loops/zol-loop.mir

+48
Original file line numberDiff line numberDiff line change
@@ -80,3 +80,51 @@ body: |
8080
PseudoRET implicit $lr
8181
8282
...
83+
84+
---
85+
name: static_bounded_loop
86+
tracksRegLiveness: true
87+
body: |
88+
; CHECK-LABEL: name: static_bounded_loop
89+
; CHECK: bb.0.entry (align 16):
90+
; CHECK-NEXT: successors: %bb.1(0x80000000)
91+
; CHECK-NEXT: liveins: $r1
92+
; CHECK-NEXT: {{ $}}
93+
; CHECK-NEXT: renamable $r2 = MOV_RLC_imm10_pseudo 64
94+
; CHECK-NEXT: $r0 = MOV_SCL_pseudo $r1
95+
; CHECK-NEXT: $lc = ADD_NC $r2, 0
96+
; CHECK-NEXT: $ls = MOVXM_lng_cg %bb.1
97+
; CHECK-NEXT: $le = MOVXM_lng_cg <mcsymbol .L_LEnd0>
98+
; CHECK-NEXT: {{ $}}
99+
; CHECK-NEXT: bb.1 (align 16):
100+
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
101+
; CHECK-NEXT: liveins: $r0
102+
; CHECK-NEXT: {{ $}}
103+
; CHECK-NEXT: renamable $r0 = nsw MUL_mul_r_rr killed renamable $r0, renamable $r0
104+
; CHECK-NEXT: PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.1
105+
; CHECK-NEXT: {{ $}}
106+
; CHECK-NEXT: bb.2:
107+
; CHECK-NEXT: liveins: $r0
108+
; CHECK-NEXT: {{ $}}
109+
; CHECK-NEXT: PseudoRET implicit $lr, implicit $r0
110+
bb.0.entry (align 16):
111+
successors: %bb.1(0x80000000)
112+
liveins: $r1
113+
114+
renamable $r2 = MOV_RLC_imm10_pseudo 64
115+
$r0 = MOV_SCL_pseudo $r1
116+
LoopStart killed renamable $r2, 0
117+
118+
bb.1 (align 16):
119+
successors: %bb.1(0x7c000000), %bb.2(0x04000000)
120+
liveins: $r0
121+
122+
renamable $r0 = nsw MUL_mul_r_rr killed renamable $r0, renamable $r0
123+
PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.1
124+
125+
bb.2:
126+
liveins: $r0
127+
128+
PseudoRET implicit $lr, implicit $r0
129+
130+
...

0 commit comments

Comments
 (0)