Skip to content

Commit d784567

Browse files
committed
[AIEX][Solver] allow solver to also search NS+1 by default. Big guns are out, we may as well use them
1 parent fc52779 commit d784567

3 files changed

Lines changed: 34 additions & 24 deletions

File tree

llvm/lib/Target/AIE/AIEPostPipeliner.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1469,15 +1469,17 @@ bool PostPipeliner::tryApproaches() {
14691469
// Therefore, if we haven't found a solution yet, bring in the big guns.
14701470
if (II == TargetII) {
14711471
const SolverData Data = createSolverData();
1472-
int NS = MinLength / II;
1472+
const int NS = MinLength / II;
14731473
if (solve(Data, NS, false)) {
14741474
return true;
14751475
}
1476-
if (NS == MinTripCount) {
1477-
// Only try this at the boundary case
1478-
if (solve(Data, NS + 1, true)) {
1479-
return true;
1480-
}
1476+
// Let's try SEF solution.
1477+
if (solve(Data, NS + 1, true)) {
1478+
return true;
1479+
}
1480+
// Hail Mary: last try with NS + 1.
1481+
if (solve(Data, NS + 1, false)) {
1482+
return true;
14811483
}
14821484
}
14831485

llvm/test/CodeGen/AIE/aie2ps/schedule/postpipeliner/maxpool-10instr-solver.mir

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,9 @@
1616
# MaxPool2D inner loop (10 instructions) with VSEL.
1717
# Similar to the 9-instr variant but with an extra VSEL_8 between
1818
# VSHIFT and VMAX_LT_8 in the compute chain.
19-
# The solver tries II=4 with a computed NS that is too small,
20-
# and the heuristic falls back to II=7.
19+
# The Z3 solver finds a solution at II=4 NS=4 but it fails
20+
# CheckFixedSchedule due to pipeline resource hazards.
21+
# Without the retry mechanism the heuristic falls back to II=7.
2122

2223
--- |
2324
target triple = "aie2ps"

llvm/test/CodeGen/AIE/aie2ps/schedule/postpipeliner/maxpool-9instr-solver.mir

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -14,36 +14,43 @@
1414
# RUN: -o - | FileCheck %s
1515

1616
# MaxPool2D inner loop (9 instructions) without VSEL.
17-
# The solver tries II=4 with a computed NS that is too small,
18-
# and the heuristic falls back to II=7.
17+
# The Z3 solver uses the same NS as the heuristics (MinLength/II and MinLength/II + 1)
18+
# and finds a valid schedule at II=4.
1919

2020
--- |
2121
target triple = "aie2ps"
2222

2323
define void @maxpool_inner_9instr(ptr addrspace(5) noalias %data, ptr addrspace(6) noalias %weights, ptr addrspace(7) noalias %out, i32 %n) {
2424
; CHECK-LABEL: maxpool_inner_9instr:
2525
; CHECK: // %bb.0: // %preheader
26-
; CHECK-NEXT: nopa ; movs p5, p1; nopx ; mov p3, p0
27-
; CHECK-NEXT: vlda wl3, [p5], #32; vldb wh5, [p3, #32]; movs p7, p0; mov r22, p3
28-
; CHECK-NEXT: vldb.3d wl5, [p7], d0; and r24, r22, r20
29-
; CHECK-NEXT: movs p3, p7
30-
; CHECK-NEXT: nop
26+
; CHECK-NEXT: nopa ; nopb ; nopx ; mov p7, p0
27+
; CHECK-NEXT: vldb.3d wl5, [p7], d0; mov p3, p0
28+
; CHECK-NEXT: vldb wh5, [p3, #32]; mov r22, p3
29+
; CHECK-NEXT: mov p5, p1
30+
; CHECK-NEXT: vlda wl3, [p5], #32
31+
; CHECK-NEXT: vldb.3d wl5, [p7], d0; movs p3, p7
32+
; CHECK-NEXT: vldb wh5, [p3, #32]; and r24, r22, r20; mov r22, p3
3133
; CHECK-NEXT: movxm ls, #.LBB0_1
32-
; CHECK-NEXT: movxm le, #.L_LEnd0
33-
; CHECK-NEXT: nopa ; nopb ; nops ; add.nc lc, r2, #-1; mov p6, p2; nopv
34+
; CHECK-NEXT: vlda wl3, [p5], #32; movxm le, #.L_LEnd0
35+
; CHECK-NEXT: nopa ; vldb.3d wl5, [p7], d0; movs p3, p7; add.nc lc, r2, #-3; vshift x1, x5, x0, r24; nopv
36+
; CHECK-NEXT: nopa ; vldb wh5, [p3, #32]; nops ; and r24, r22, r20; mov r22, p3; nopv
37+
; CHECK-NEXT: nopa ; nopb ; nops ; nopxm ; nopv
38+
; CHECK-NEXT: vlda wl3, [p5], #32; nopb ; movs p6, p2; nopx ; vmax_lt.8 x10, r17:r16, x3, x1, vaddsign1; nopv
3439
; CHECK-NEXT: .LBB0_1: // %loop_body
3540
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
36-
; CHECK-NEXT: vlda wl3, [p5], #32; vldb wh5, [p3, #32]; nops ; nopx ; mov r22, p3; nopv
37-
; CHECK-NEXT: nopa ; vldb.3d wl5, [p7], d0; nops ; and r24, r22, r20; vshift x1, x5, x0, r24; nopv
38-
; CHECK-NEXT: nopa ; nopb ; movs p3, p7; nopx ; vmax_lt.8 x10, r17:r16, x3, x1, vaddsign1; nopv
39-
; CHECK-NEXT: nopa ; nopb ; nops ; nopxm ; nopv
40-
; CHECK-NEXT: nopa ; nopb ; vst wl10, [p6], #32; nopxm ; nopv
41+
; CHECK-NEXT: nopa ; vldb.3d wl5, [p7], d0; movs p3, p7; nopx ; vshift x1, x5, x0, r24; nopv
42+
; CHECK-NEXT: nopa ; vldb wh5, [p3, #32]; vst wl10, [p6], #32; and r24, r22, r20; mov r22, p3; nopv
4143
; CHECK-NEXT: nopa ; nopb ; nops ; nopxm ; nopv
4244
; CHECK-NEXT: .L_LEnd0:
43-
; CHECK-NEXT: nopa ; nopb ; nops ; nopxm ; nopv
45+
; CHECK-NEXT: vlda wl3, [p5], #32; nopb ; nops ; nopx ; vmax_lt.8 x10, r17:r16, x3, x1, vaddsign1; nopv
4446
; CHECK-NEXT: // %bb.2: // %exit
45-
; CHECK-NEXT: nopx
47+
; CHECK-NEXT: nopa ; nopb ; movs p3, p7; nopx ; vshift x1, x5, x0, r24; nopv
48+
; CHECK-NEXT: vst wl10, [p6], #32; nopb ; and r24, r22, r20
49+
; CHECK-NEXT: nop
50+
; CHECK-NEXT: vmax_lt.8 x10, r17:r16, x3, x1, vaddsign1
4651
; CHECK-NEXT: vshift x1, x5, x0, r24
52+
; CHECK-NEXT: vst wl10, [p6], #32
53+
; CHECK-NEXT: nop
4754
; CHECK-NEXT: vmax_lt.8 x10, r17:r16, x3, x1, vaddsign1
4855
; CHECK-NEXT: nop
4956
; CHECK-NEXT: vst wl10, [p6], #32

0 commit comments

Comments
 (0)