Skip to content

Commit 5d4946d

Browse files
committed
[SeparateConstOffsetFromGEP] Preserve inbounds flag based on ValueTracking
If we know that the initial GEP was inbounds, and we change it to a sequence of GEPs from the same base pointer where every offset is non-negative, then the new GEPs are inbounds. For SWDEV-516125.
1 parent be258a2 commit 5d4946d

File tree

4 files changed

+48
-17
lines changed

4 files changed

+48
-17
lines changed

Diff for: llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp

+13-5
Original file line numberDiff line numberDiff line change
@@ -1052,6 +1052,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
10521052
}
10531053
}
10541054

1055+
bool MayRecoverInbounds = AccumulativeByteOffset >= 0 && GEP->isInBounds();
1056+
10551057
// Remove the constant offset in each sequential index. The resultant GEP
10561058
// computes the variadic base.
10571059
// Notice that we don't remove struct field indices here. If LowerGEP is
@@ -1079,6 +1081,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
10791081
// and the old index if they are not used.
10801082
RecursivelyDeleteTriviallyDeadInstructions(UserChainTail);
10811083
RecursivelyDeleteTriviallyDeadInstructions(OldIdx);
1084+
MayRecoverInbounds =
1085+
MayRecoverInbounds && computeKnownBits(NewIdx, *DL).isNonNegative();
10821086
}
10831087
}
10841088
}
@@ -1100,11 +1104,15 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
11001104
// address with silently-wrapping two's complement arithmetic".
11011105
// Therefore, the final code will be a semantically equivalent.
11021106
//
1103-
// TODO(jingyue): do some range analysis to keep as many inbounds as
1104-
// possible. GEPs with inbounds are more friendly to alias analysis.
1105-
// TODO(gep_nowrap): Preserve nuw at least.
1106-
GEPNoWrapFlags NewGEPFlags = GEPNoWrapFlags::none();
1107-
GEP->setNoWrapFlags(GEPNoWrapFlags::none());
1107+
// If the initial GEP was inbounds and all variable indices and the
1108+
// accumulated offsets are non-negative, they can be added in any order and
1109+
// the intermediate results are in bounds. So, we can preserve the inbounds
1110+
// flag for both GEPs. GEPs with inbounds are more friendly to alias analysis.
1111+
//
1112+
// TODO(gep_nowrap): Preserve nuw?
1113+
GEPNoWrapFlags NewGEPFlags =
1114+
MayRecoverInbounds ? GEPNoWrapFlags::inBounds() : GEPNoWrapFlags::none();
1115+
GEP->setNoWrapFlags(NewGEPFlags);
11081116

11091117
// Lowers a GEP to either GEPs with a single index or arithmetic operations.
11101118
if (LowerGEP) {

Diff for: llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll

+23
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,26 @@ entry:
1616
%arrayidx = getelementptr inbounds i32, ptr %p, i64 %idx
1717
ret ptr %arrayidx
1818
}
19+
20+
; All offsets must be positive, so inbounds can be preserved.
21+
define void @must_be_inbounds(ptr %dst, ptr %src, i32 %i) {
22+
; CHECK-LABEL: @must_be_inbounds(
23+
; CHECK-NEXT: entry:
24+
; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I:%.*]] to i64
25+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 [[I_PROM]]
26+
; CHECK-NEXT: [[ARRAYIDX_SRC2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4
27+
; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX_SRC2]], align 4
28+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 [[I_PROM]]
29+
; CHECK-NEXT: [[ARRAYIDX_DST4:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 4
30+
; CHECK-NEXT: store float [[TMP1]], ptr [[ARRAYIDX_DST4]], align 4
31+
; CHECK-NEXT: ret void
32+
;
33+
entry:
34+
%i.prom = zext i32 %i to i64
35+
%idx = add nsw i64 %i.prom, 1
36+
%arrayidx.src = getelementptr inbounds float, ptr %src, i64 %idx
37+
%3 = load float, ptr %arrayidx.src, align 4
38+
%arrayidx.dst = getelementptr inbounds float, ptr %dst, i64 %idx
39+
store float %3, ptr %arrayidx.dst, align 4
40+
ret void
41+
}

Diff for: llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll

+8-8
Original file line numberDiff line numberDiff line change
@@ -157,19 +157,19 @@ define void @sum_of_array3(i32 %x, i32 %y, ptr nocapture %output) {
157157
; IR-NEXT: .preheader:
158158
; IR-NEXT: [[TMP0:%.*]] = zext i32 [[Y]] to i64
159159
; IR-NEXT: [[TMP1:%.*]] = zext i32 [[X]] to i64
160-
; IR-NEXT: [[TMP2:%.*]] = getelementptr [32 x [32 x float]], ptr addrspace(3) @array, i64 0, i64 [[TMP1]], i64 [[TMP0]]
160+
; IR-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x [32 x float]], ptr addrspace(3) @array, i64 0, i64 [[TMP1]], i64 [[TMP0]]
161161
; IR-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(3) [[TMP2]] to ptr
162162
; IR-NEXT: [[TMP4:%.*]] = load float, ptr [[TMP3]], align 4
163163
; IR-NEXT: [[TMP5:%.*]] = fadd float [[TMP4]], 0.000000e+00
164-
; IR-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 4
164+
; IR-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 4
165165
; IR-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(3) [[TMP6]] to ptr
166166
; IR-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4
167167
; IR-NEXT: [[TMP9:%.*]] = fadd float [[TMP5]], [[TMP8]]
168-
; IR-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 128
168+
; IR-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 128
169169
; IR-NEXT: [[TMP11:%.*]] = addrspacecast ptr addrspace(3) [[TMP10]] to ptr
170170
; IR-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4
171171
; IR-NEXT: [[TMP13:%.*]] = fadd float [[TMP9]], [[TMP12]]
172-
; IR-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 132
172+
; IR-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 132
173173
; IR-NEXT: [[TMP15:%.*]] = addrspacecast ptr addrspace(3) [[TMP14]] to ptr
174174
; IR-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4
175175
; IR-NEXT: [[TMP17:%.*]] = fadd float [[TMP13]], [[TMP16]]
@@ -224,19 +224,19 @@ define void @sum_of_array4(i32 %x, i32 %y, ptr nocapture %output) {
224224
; IR-NEXT: .preheader:
225225
; IR-NEXT: [[TMP0:%.*]] = zext i32 [[Y]] to i64
226226
; IR-NEXT: [[TMP1:%.*]] = zext i32 [[X]] to i64
227-
; IR-NEXT: [[TMP2:%.*]] = getelementptr [32 x [32 x float]], ptr addrspace(3) @array, i64 0, i64 [[TMP1]], i64 [[TMP0]]
227+
; IR-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x [32 x float]], ptr addrspace(3) @array, i64 0, i64 [[TMP1]], i64 [[TMP0]]
228228
; IR-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(3) [[TMP2]] to ptr
229229
; IR-NEXT: [[TMP4:%.*]] = load float, ptr [[TMP3]], align 4
230230
; IR-NEXT: [[TMP5:%.*]] = fadd float [[TMP4]], 0.000000e+00
231-
; IR-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 4
231+
; IR-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 4
232232
; IR-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(3) [[TMP6]] to ptr
233233
; IR-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4
234234
; IR-NEXT: [[TMP9:%.*]] = fadd float [[TMP5]], [[TMP8]]
235-
; IR-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 128
235+
; IR-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 128
236236
; IR-NEXT: [[TMP11:%.*]] = addrspacecast ptr addrspace(3) [[TMP10]] to ptr
237237
; IR-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4
238238
; IR-NEXT: [[TMP13:%.*]] = fadd float [[TMP9]], [[TMP12]]
239-
; IR-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 132
239+
; IR-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 132
240240
; IR-NEXT: [[TMP15:%.*]] = addrspacecast ptr addrspace(3) [[TMP14]] to ptr
241241
; IR-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4
242242
; IR-NEXT: [[TMP17:%.*]] = fadd float [[TMP13]], [[TMP16]]

Diff for: llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll

+4-4
Original file line numberDiff line numberDiff line change
@@ -372,8 +372,8 @@ define ptr @trunk_explicit(ptr %ptr, i64 %idx) {
372372
; CHECK-LABEL: define ptr @trunk_explicit(
373373
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[IDX:%.*]]) {
374374
; CHECK-NEXT: entry:
375-
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT0:%.*]], ptr [[PTR]], i64 0, i32 3, i64 [[IDX]], i32 1
376-
; CHECK-NEXT: [[PTR21:%.*]] = getelementptr i8, ptr [[TMP0]], i64 3216
375+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT0:%.*]], ptr [[PTR]], i64 0, i32 3, i64 [[IDX]], i32 1
376+
; CHECK-NEXT: [[PTR21:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 3216
377377
; CHECK-NEXT: ret ptr [[PTR21]]
378378
;
379379
entry:
@@ -389,8 +389,8 @@ define ptr @trunk_long_idx(ptr %ptr, i64 %idx) {
389389
; CHECK-LABEL: define ptr @trunk_long_idx(
390390
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[IDX:%.*]]) {
391391
; CHECK-NEXT: entry:
392-
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT0:%.*]], ptr [[PTR]], i64 0, i32 3, i64 [[IDX]], i32 1
393-
; CHECK-NEXT: [[PTR21:%.*]] = getelementptr i8, ptr [[TMP0]], i64 3216
392+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT0:%.*]], ptr [[PTR]], i64 0, i32 3, i64 [[IDX]], i32 1
393+
; CHECK-NEXT: [[PTR21:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 3216
394394
; CHECK-NEXT: ret ptr [[PTR21]]
395395
;
396396
entry:

0 commit comments

Comments
 (0)