Skip to content

Commit 6073fde

Browse files
authored
[VPlan] Directly check if middle block is pred of scalar preheader. (llvm#191768)
hasScalarTail currently returns incorrect results when queried after runtime checks have been added. Generalize and harden by checking if the middle block is a predecessor of the scalar preheader.
1 parent 53e01f1 commit 6073fde

File tree

2 files changed

+319
-5
lines changed

2 files changed

+319
-5
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4959,12 +4959,12 @@ class VPlan {
49594959
(ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
49604960
}
49614961

4962-
/// Returns true if the scalar tail may execute after the vector loop. Note
4963-
/// that this relies on unneeded branches to the scalar tail loop being
4964-
/// removed.
4962+
/// Returns true if the scalar tail may execute after the vector loop, i.e.
4963+
/// if the middle block is a predecessor of the scalar preheader. Note that
4964+
/// this relies on unneeded branches to the scalar tail loop being removed.
49654965
bool hasScalarTail() const {
4966-
return !(!getScalarPreheader()->hasPredecessors() ||
4967-
getScalarPreheader()->getSinglePredecessor() == getEntry());
4966+
return is_contained(getScalarPreheader()->getPredecessors(),
4967+
getMiddleBlock());
49684968
}
49694969
};
49704970

Lines changed: 314 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,314 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
2+
; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-target-supports-masked-memory-ops \
3+
; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
4+
; RUN: -force-tail-folding-style=data-without-lane-mask -S %s | FileCheck %s
5+
6+
define void @tc_17_without_runtime_check(ptr noalias %A, ptr noalias %B) {
7+
; CHECK-LABEL: define void @tc_17_without_runtime_check(
8+
; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
9+
; CHECK-NEXT: [[ENTRY:.*:]]
10+
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
11+
; CHECK: [[VECTOR_PH]]:
12+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
13+
; CHECK: [[VECTOR_BODY]]:
14+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
15+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
16+
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IND]], splat (i32 16)
17+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i32 [[INDEX]]
18+
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[VEC_IND]], ptr align 4 [[TMP1]], <4 x i1> [[TMP0]])
19+
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 17)
20+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[B]], i32 [[INDEX]]
21+
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP2]], ptr align 4 [[TMP3]], <4 x i1> [[TMP0]])
22+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
23+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
24+
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20
25+
; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
26+
; CHECK: [[MIDDLE_BLOCK]]:
27+
; CHECK-NEXT: br label %[[EXIT:.*]]
28+
; CHECK: [[EXIT]]:
29+
; CHECK-NEXT: ret void
30+
;
31+
entry:
32+
br label %loop
33+
34+
loop:
35+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
36+
%gep.A = getelementptr i32, ptr %A, i32 %iv
37+
store i32 %iv, ptr %gep.A
38+
%v = add i32 %iv, 17
39+
%q = getelementptr i32, ptr %B, i32 %iv
40+
store i32 %v, ptr %q
41+
%iv.next = add i32 %iv, 1
42+
%ec = icmp eq i32 %iv.next, 17
43+
br i1 %ec, label %exit, label %loop
44+
45+
exit:
46+
ret void
47+
}
48+
49+
define void @tc_17_with_runtime_check(ptr %A, ptr %B) {
50+
; CHECK-LABEL: define void @tc_17_with_runtime_check(
51+
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
52+
; CHECK-NEXT: [[ENTRY:.*:]]
53+
; CHECK-NEXT: [[A2:%.*]] = ptrtoaddr ptr [[A]] to i64
54+
; CHECK-NEXT: [[B1:%.*]] = ptrtoaddr ptr [[B]] to i64
55+
; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]]
56+
; CHECK: [[VECTOR_MEMCHECK]]:
57+
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[B1]], [[A2]]
58+
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16
59+
; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
60+
; CHECK: [[VECTOR_PH]]:
61+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
62+
; CHECK: [[VECTOR_BODY]]:
63+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
64+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
65+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i32> [[VEC_IND]], splat (i32 16)
66+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[A]], i32 [[INDEX]]
67+
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[VEC_IND]], ptr align 4 [[TMP2]], <4 x i1> [[TMP1]])
68+
; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 17)
69+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[B]], i32 [[INDEX]]
70+
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP3]], ptr align 4 [[TMP4]], <4 x i1> [[TMP1]])
71+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
72+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
73+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20
74+
; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
75+
; CHECK: [[MIDDLE_BLOCK]]:
76+
; CHECK-NEXT: br label %[[EXIT:.*]]
77+
; CHECK: [[SCALAR_PH]]:
78+
; CHECK-NEXT: br label %[[LOOP:.*]]
79+
; CHECK: [[LOOP]]:
80+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
81+
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i32 [[IV]]
82+
; CHECK-NEXT: store i32 [[IV]], ptr [[GEP_A]], align 4
83+
; CHECK-NEXT: [[V:%.*]] = add i32 [[IV]], 17
84+
; CHECK-NEXT: [[Q:%.*]] = getelementptr i32, ptr [[B]], i32 [[IV]]
85+
; CHECK-NEXT: store i32 [[V]], ptr [[Q]], align 4
86+
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
87+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 17
88+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
89+
; CHECK: [[EXIT]]:
90+
; CHECK-NEXT: ret void
91+
;
92+
entry:
93+
br label %loop
94+
95+
loop:
96+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
97+
%gep.A = getelementptr i32, ptr %A, i32 %iv
98+
store i32 %iv, ptr %gep.A
99+
%v = add i32 %iv, 17
100+
%q = getelementptr i32, ptr %B, i32 %iv
101+
store i32 %v, ptr %q
102+
%iv.next = add i32 %iv, 1
103+
%ec = icmp eq i32 %iv.next, 17
104+
br i1 %ec, label %exit, label %loop
105+
106+
exit:
107+
ret void
108+
}
109+
110+
define void @tc_20_without_runtime_checks(ptr noalias %A, ptr noalias %B) {
111+
; CHECK-LABEL: define void @tc_20_without_runtime_checks(
112+
; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
113+
; CHECK-NEXT: [[ENTRY:.*:]]
114+
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
115+
; CHECK: [[VECTOR_PH]]:
116+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
117+
; CHECK: [[VECTOR_BODY]]:
118+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
119+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
120+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i32 [[INDEX]]
121+
; CHECK-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP0]], align 4
122+
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 17)
123+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[B]], i32 [[INDEX]]
124+
; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP2]], align 4
125+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
126+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
127+
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20
128+
; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
129+
; CHECK: [[MIDDLE_BLOCK]]:
130+
; CHECK-NEXT: br label %[[EXIT:.*]]
131+
; CHECK: [[EXIT]]:
132+
; CHECK-NEXT: ret void
133+
;
134+
entry:
135+
br label %loop
136+
137+
loop:
138+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
139+
%gep.A = getelementptr i32, ptr %A, i32 %iv
140+
store i32 %iv, ptr %gep.A
141+
%v = add i32 %iv, 17
142+
%q = getelementptr i32, ptr %B, i32 %iv
143+
store i32 %v, ptr %q
144+
%iv.next = add i32 %iv, 1
145+
%ec = icmp eq i32 %iv.next, 20
146+
br i1 %ec, label %exit, label %loop
147+
148+
exit:
149+
ret void
150+
}
151+
152+
define void @tc_20_with_runtime_checks(ptr %A, ptr %B) {
153+
; CHECK-LABEL: define void @tc_20_with_runtime_checks(
154+
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
155+
; CHECK-NEXT: [[ENTRY:.*:]]
156+
; CHECK-NEXT: [[A2:%.*]] = ptrtoaddr ptr [[A]] to i64
157+
; CHECK-NEXT: [[B1:%.*]] = ptrtoaddr ptr [[B]] to i64
158+
; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]]
159+
; CHECK: [[VECTOR_MEMCHECK]]:
160+
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[B1]], [[A2]]
161+
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16
162+
; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
163+
; CHECK: [[VECTOR_PH]]:
164+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
165+
; CHECK: [[VECTOR_BODY]]:
166+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
167+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
168+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i32 [[INDEX]]
169+
; CHECK-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP1]], align 4
170+
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 17)
171+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[B]], i32 [[INDEX]]
172+
; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
173+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
174+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
175+
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20
176+
; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
177+
; CHECK: [[MIDDLE_BLOCK]]:
178+
; CHECK-NEXT: br label %[[EXIT:.*]]
179+
; CHECK: [[SCALAR_PH]]:
180+
; CHECK-NEXT: br label %[[LOOP:.*]]
181+
; CHECK: [[LOOP]]:
182+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
183+
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i32 [[IV]]
184+
; CHECK-NEXT: store i32 [[IV]], ptr [[GEP_A]], align 4
185+
; CHECK-NEXT: [[V:%.*]] = add i32 [[IV]], 17
186+
; CHECK-NEXT: [[Q:%.*]] = getelementptr i32, ptr [[B]], i32 [[IV]]
187+
; CHECK-NEXT: store i32 [[V]], ptr [[Q]], align 4
188+
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
189+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 20
190+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
191+
; CHECK: [[EXIT]]:
192+
; CHECK-NEXT: ret void
193+
;
194+
entry:
195+
br label %loop
196+
197+
loop:
198+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
199+
%gep.A = getelementptr i32, ptr %A, i32 %iv
200+
store i32 %iv, ptr %gep.A
201+
%v = add i32 %iv, 17
202+
%q = getelementptr i32, ptr %B, i32 %iv
203+
store i32 %v, ptr %q
204+
%iv.next = add i32 %iv, 1
205+
%ec = icmp eq i32 %iv.next, 20
206+
br i1 %ec, label %exit, label %loop
207+
208+
exit:
209+
ret void
210+
}
211+
212+
define void @tc_23_without_runtime_checks(ptr noalias %A, ptr noalias %B) {
213+
; CHECK-LABEL: define void @tc_23_without_runtime_checks(
214+
; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
215+
; CHECK-NEXT: [[ENTRY:.*:]]
216+
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
217+
; CHECK: [[VECTOR_PH]]:
218+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
219+
; CHECK: [[VECTOR_BODY]]:
220+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
221+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
222+
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IND]], splat (i32 22)
223+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i32 [[INDEX]]
224+
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[VEC_IND]], ptr align 4 [[TMP1]], <4 x i1> [[TMP0]])
225+
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 17)
226+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[B]], i32 [[INDEX]]
227+
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP2]], ptr align 4 [[TMP3]], <4 x i1> [[TMP0]])
228+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
229+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
230+
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 24
231+
; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
232+
; CHECK: [[MIDDLE_BLOCK]]:
233+
; CHECK-NEXT: br label %[[EXIT:.*]]
234+
; CHECK: [[EXIT]]:
235+
; CHECK-NEXT: ret void
236+
;
237+
entry:
238+
br label %loop
239+
240+
loop:
241+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
242+
%gep.A = getelementptr i32, ptr %A, i32 %iv
243+
store i32 %iv, ptr %gep.A
244+
%v = add i32 %iv, 17
245+
%q = getelementptr i32, ptr %B, i32 %iv
246+
store i32 %v, ptr %q
247+
%iv.next = add i32 %iv, 1
248+
%ec = icmp eq i32 %iv.next, 23
249+
br i1 %ec, label %exit, label %loop
250+
251+
exit:
252+
ret void
253+
}
254+
255+
define void @tc_23_with_runtime_checks(ptr %A, ptr %B) {
256+
; CHECK-LABEL: define void @tc_23_with_runtime_checks(
257+
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
258+
; CHECK-NEXT: [[ENTRY:.*:]]
259+
; CHECK-NEXT: [[A2:%.*]] = ptrtoaddr ptr [[A]] to i64
260+
; CHECK-NEXT: [[B1:%.*]] = ptrtoaddr ptr [[B]] to i64
261+
; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]]
262+
; CHECK: [[VECTOR_MEMCHECK]]:
263+
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[B1]], [[A2]]
264+
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16
265+
; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
266+
; CHECK: [[VECTOR_PH]]:
267+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
268+
; CHECK: [[VECTOR_BODY]]:
269+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
270+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
271+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i32> [[VEC_IND]], splat (i32 22)
272+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[A]], i32 [[INDEX]]
273+
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[VEC_IND]], ptr align 4 [[TMP2]], <4 x i1> [[TMP1]])
274+
; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 17)
275+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[B]], i32 [[INDEX]]
276+
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP3]], ptr align 4 [[TMP4]], <4 x i1> [[TMP1]])
277+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
278+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
279+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 24
280+
; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
281+
; CHECK: [[MIDDLE_BLOCK]]:
282+
; CHECK-NEXT: br label %[[EXIT:.*]]
283+
; CHECK: [[SCALAR_PH]]:
284+
; CHECK-NEXT: br label %[[LOOP:.*]]
285+
; CHECK: [[LOOP]]:
286+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
287+
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i32 [[IV]]
288+
; CHECK-NEXT: store i32 [[IV]], ptr [[GEP_A]], align 4
289+
; CHECK-NEXT: [[V:%.*]] = add i32 [[IV]], 17
290+
; CHECK-NEXT: [[Q:%.*]] = getelementptr i32, ptr [[B]], i32 [[IV]]
291+
; CHECK-NEXT: store i32 [[V]], ptr [[Q]], align 4
292+
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
293+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 23
294+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP10:![0-9]+]]
295+
; CHECK: [[EXIT]]:
296+
; CHECK-NEXT: ret void
297+
;
298+
entry:
299+
br label %loop
300+
301+
loop:
302+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
303+
%gep.A = getelementptr i32, ptr %A, i32 %iv
304+
store i32 %iv, ptr %gep.A
305+
%v = add i32 %iv, 17
306+
%q = getelementptr i32, ptr %B, i32 %iv
307+
store i32 %v, ptr %q
308+
%iv.next = add i32 %iv, 1
309+
%ec = icmp eq i32 %iv.next, 23
310+
br i1 %ec, label %exit, label %loop
311+
312+
exit:
313+
ret void
314+
}

0 commit comments

Comments
 (0)