Skip to content

Commit 4d4609e

Browse files
committed
[InstCombine][AIEX] Add load->trunc->inttoptr combine pattern
InstCombine optimization for load->trunc->inttoptr pattern on aie The aie target has 20-bit pointers, which are stored as i24 (3 bytes) This optimization converts: load i24 -> trunc i20 -> inttoptr Into: load ptr -> ptrtoint -> zext i24 This allows SROA to recognize pointer-only allocas and promote them
1 parent 30e6a3b commit 4d4609e

7 files changed

Lines changed: 564 additions & 382 deletions

File tree

clang/test/CodeGen/aie/aie2p/aie2p-stream-intrinsics.cpp

Lines changed: 120 additions & 121 deletions
Large diffs are not rendered by default.

clang/test/CodeGen/aie/common-tests/aie-stream-intrinsics.cpp

Lines changed: 238 additions & 245 deletions
Large diffs are not rendered by default.

llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp

Lines changed: 97 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
// See https://llvm.org/LICENSE.txt for license information.
55
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66
//
7-
// Modifications (c) Copyright 2024 Advanced Micro Devices, Inc. or its
7+
// Modifications (c) Copyright 2024-2026 Advanced Micro Devices, Inc. or its
88
// affiliates
99
//
1010
//===----------------------------------------------------------------------===//
@@ -987,6 +987,98 @@ static bool canSimplifyNullStoreOrGEP(StoreInst &SI) {
987987
!NullPointerIsDefined(SI.getFunction(), SI.getPointerAddressSpace()));
988988
}
989989

990+
/// Optimize loads that feed trunc->inttoptr patterns on targets with
991+
/// non-byte-aligned pointer sizes.
992+
///
993+
/// Pattern: load iN -> trunc iM -> inttoptr ptr (where iM is pointer size)
994+
/// Transform: load iN -> (zext (ptrtoint (load ptr)))
995+
///
996+
/// This transformation is only applied when all uses of the load only
997+
/// access the low PtrSize bits, ensuring the transformation is safe.
998+
///
999+
/// Safety is verified by checking each use:
1000+
/// - Truncations to PtrSize or smaller bits are safe
1001+
/// - And operations with masks that don't touch upper bits are safe
1002+
/// - Other uses are conservatively rejected
1003+
///
1004+
/// Note: We manually check specific patterns rather than using DemandedBits
1005+
/// analysis because DemandedBits/SimplifyDemandedBits would modify the IR,
1006+
/// and we need a read-only check here. This approach could be extended to
1007+
/// handle additional safe patterns in the future (e.g., shifts, specific
1008+
/// arithmetic operations with known ranges).
1009+
///
1010+
/// \returns the replacement value if optimization was applied, nullptr
1011+
/// otherwise.
1012+
static Value *foldLoadWithTruncIntToPtrPattern(InstCombinerImpl &IC,
1013+
LoadInst &LI) {
1014+
if (!LI.getType()->isIntegerTy())
1015+
return nullptr;
1016+
1017+
const unsigned LoadWidth = LI.getType()->getIntegerBitWidth();
1018+
const unsigned AS = LI.getPointerAddressSpace();
1019+
const DataLayout &DL = IC.getDataLayout();
1020+
const unsigned PtrSize = DL.getPointerSizeInBits(AS);
1021+
1022+
// Check if pattern exists: trunc to PtrSize -> inttoptr
1023+
bool HasPattern = false;
1024+
for (const User *U : LI.users()) {
1025+
if (const auto *Trunc = dyn_cast<TruncInst>(U)) {
1026+
if (Trunc->getDestTy()->getIntegerBitWidth() == PtrSize) {
1027+
for (const User *TU : Trunc->users()) {
1028+
if (isa<IntToPtrInst>(TU)) {
1029+
HasPattern = true;
1030+
break;
1031+
}
1032+
}
1033+
}
1034+
}
1035+
if (HasPattern)
1036+
break;
1037+
}
1038+
1039+
if (!HasPattern)
1040+
return nullptr;
1041+
1042+
// Safety check: verify all uses only need low PtrSize bits
1043+
const APInt UpperBitsMask =
1044+
APInt::getHighBitsSet(LoadWidth, LoadWidth - PtrSize);
1045+
1046+
for (const User *LoadUser : LI.users()) {
1047+
// Trunc to PtrSize or smaller is safe
1048+
if (const auto *T = dyn_cast<TruncInst>(LoadUser)) {
1049+
if (T->getDestTy()->getIntegerBitWidth() > PtrSize)
1050+
return nullptr; // Early return: unsafe trunc
1051+
continue;
1052+
}
1053+
1054+
// And with mask that doesn't touch upper bits is safe
1055+
if (const auto *And = dyn_cast<BinaryOperator>(LoadUser)) {
1056+
if (And->getOpcode() == Instruction::And) {
1057+
if (const auto *C = dyn_cast<ConstantInt>(And->getOperand(1))) {
1058+
if ((C->getValue() & UpperBitsMask).isZero())
1059+
continue; // Safe
1060+
}
1061+
}
1062+
}
1063+
1064+
// Other uses - conservatively reject
1065+
return nullptr; // Early return: unsafe use
1066+
}
1067+
1068+
// All checks passed - apply transformation
1069+
Type *PtrTy = PointerType::get(LI.getContext(), AS);
1070+
LoadInst *PtrLoad =
1071+
new LoadInst(PtrTy, LI.getPointerOperand(), LI.getName(), LI.isVolatile(),
1072+
LI.getAlign(), LI.getOrdering(), LI.getSyncScopeID());
1073+
PtrLoad->setDebugLoc(LI.getDebugLoc());
1074+
PtrLoad->copyMetadata(LI);
1075+
IC.InsertNewInstWith(PtrLoad, LI.getIterator());
1076+
1077+
Value *PtrToInt = IC.Builder.CreatePtrToInt(
1078+
PtrLoad, IntegerType::get(LI.getContext(), PtrSize));
1079+
return IC.Builder.CreateZExt(PtrToInt, LI.getType());
1080+
}
1081+
9901082
static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op) {
9911083
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
9921084
const Value *GEPI0 = GEPI->getOperand(0);
@@ -1002,6 +1094,10 @@ static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op) {
10021094
}
10031095

10041096
Instruction *InstCombinerImpl::visitLoadInst(LoadInst &LI) {
1097+
// Optimize integer loads that feed trunc->inttoptr pattern
1098+
if (Value *V = foldLoadWithTruncIntToPtrPattern(*this, LI))
1099+
return replaceInstUsesWith(LI, V);
1100+
10051101
Value *Op = LI.getOperand(0);
10061102
if (Value *Res = simplifyLoadInst(&LI, Op, SQ.getWithInstruction(&LI)))
10071103
return replaceInstUsesWith(LI, Res);
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
;
3+
; This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4+
; See https://llvm.org/LICENSE.txt for license information.
5+
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
;
7+
; (c) Copyright 2026 Advanced Micro Devices, Inc. or its affiliates
8+
;
9+
; RUN: opt -S -passes=instcombine -mtriple=aie2p %s | FileCheck %s
10+
; RUN: opt -S -passes=instcombine -mtriple=aie2ps %s | FileCheck %s
11+
12+
; Test InstCombine optimization for load->trunc->inttoptr pattern on aie
13+
; The aie target has 20-bit pointers, which are stored as i24 (3 bytes)
14+
; This optimization converts: load i24 -> trunc i20 -> inttoptr
15+
; Into: load ptr -> ptrtoint -> zext i24
16+
; This allows SROA to recognize pointer-only allocas and promote them
17+
18+
; Basic test: load i24 -> trunc i20 -> inttoptr should be optimized
19+
define ptr @test_load_trunc_inttoptr_basic(ptr %p) {
20+
; CHECK-LABEL: @test_load_trunc_inttoptr_basic(
21+
; CHECK-NEXT: [[LOAD12:%.*]] = load ptr, ptr [[P:%.*]], align 4
22+
; CHECK-NEXT: ret ptr [[LOAD12]]
23+
;
24+
%load = load i24, ptr %p, align 4
25+
%trunc = trunc i24 %load to i20
26+
%ptr = inttoptr i20 %trunc to ptr
27+
ret ptr %ptr
28+
}
29+
30+
; Test with safe additional use (and with small mask that doesn't touch upper bits)
31+
define ptr @test_load_trunc_inttoptr_with_and(ptr %p) {
32+
; CHECK-LABEL: @test_load_trunc_inttoptr_with_and(
33+
; CHECK-NEXT: [[LOAD12:%.*]] = load ptr, ptr [[P:%.*]], align 4
34+
; CHECK-NEXT: ret ptr [[LOAD12]]
35+
;
36+
%load = load i24, ptr %p, align 4
37+
%and = and i24 %load, 31 ; Only uses low 5 bits - safe
38+
%trunc = trunc i24 %load to i20
39+
%ptr = inttoptr i20 %trunc to ptr
40+
ret ptr %ptr
41+
}
42+
43+
; Test with multiple safe truncs (all to <= 20 bits)
44+
define ptr @test_load_trunc_inttoptr_multiple_safe_truncs(ptr %p) {
45+
; CHECK-LABEL: @test_load_trunc_inttoptr_multiple_safe_truncs(
46+
; CHECK-NEXT: [[LOAD12:%.*]] = load ptr, ptr [[P:%.*]], align 4
47+
; CHECK-NEXT: ret ptr [[LOAD12]]
48+
;
49+
%load = load i24, ptr %p, align 4
50+
%trunc1 = trunc i24 %load to i16 ; Safe: 16 <= 20
51+
%trunc2 = trunc i24 %load to i20 ; Safe: 20 <= 20
52+
%ptr = inttoptr i20 %trunc2 to ptr
53+
ret ptr %ptr
54+
}
55+
56+
; Negative test: unsafe trunc (> 20 bits) should prevent optimization
57+
define i22 @test_load_trunc_inttoptr_unsafe_trunc(ptr %p) {
58+
; CHECK-LABEL: @test_load_trunc_inttoptr_unsafe_trunc(
59+
; CHECK-NEXT: [[LOAD:%.*]] = load i24, ptr [[P:%.*]], align 4
60+
; CHECK-NEXT: [[TRUNC_UNSAFE:%.*]] = trunc i24 [[LOAD]] to i22
61+
; CHECK-NEXT: ret i22 [[TRUNC_UNSAFE]]
62+
;
63+
%load = load i24, ptr %p, align 4
64+
%trunc_unsafe = trunc i24 %load to i22 ; Unsafe: 22 > 20
65+
%trunc = trunc i24 %load to i20
66+
%ptr = inttoptr i20 %trunc to ptr
67+
ret i22 %trunc_unsafe ; Use the unsafe trunc
68+
}
69+
70+
; Negative test: and with mask that touches upper bits should prevent optimization
71+
define i24 @test_load_trunc_inttoptr_unsafe_and(ptr %p) {
72+
; CHECK-LABEL: @test_load_trunc_inttoptr_unsafe_and(
73+
; CHECK-NEXT: [[LOAD:%.*]] = load i24, ptr [[P:%.*]], align 4
74+
; CHECK-NEXT: ret i24 [[LOAD]]
75+
;
76+
%load = load i24, ptr %p, align 4
77+
%and = and i24 %load, 16777215 ; Uses all 24 bits including upper bits
78+
%trunc = trunc i24 %load to i20
79+
%ptr = inttoptr i20 %trunc to ptr
80+
ret i24 %and ; Use the and result
81+
}
82+
83+
; Negative test: other use type should prevent optimization
84+
define i24 @test_load_trunc_inttoptr_other_use(ptr %p) {
85+
; CHECK-LABEL: @test_load_trunc_inttoptr_other_use(
86+
; CHECK-NEXT: [[LOAD:%.*]] = load i24, ptr [[P:%.*]], align 4
87+
; CHECK-NEXT: [[ADD:%.*]] = add i24 [[LOAD]], 1
88+
; CHECK-NEXT: ret i24 [[ADD]]
89+
;
90+
%load = load i24, ptr %p, align 4
91+
%add = add i24 %load, 1 ; Unknown use - conservatively unsafe
92+
%trunc = trunc i24 %load to i20
93+
%ptr = inttoptr i20 %trunc to ptr
94+
ret i24 %add ; Use the add result
95+
}

llvm/test/Transforms/InstCombine/indexed-gep-compares.ll

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,12 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
;
3+
; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
; See https://llvm.org/LICENSE.txt for license information.
5+
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
;
7+
; Modifications (c) Copyright 2024-2026 Advanced Micro Devices, Inc. or its
8+
; affiliates
9+
210
; RUN: opt -passes='instcombine<no-verify-fixpoint>' -S < %s | FileCheck %s
311

412
; In many of these tests nuw can be inferred on the sunk GEP in the exit
@@ -373,15 +381,8 @@ declare i32 @__gxx_personality_v0(...)
373381
define i1 @test8(ptr %in, i64 %offset) {
374382
; CHECK-LABEL: @test8(
375383
; CHECK-NEXT: entry:
376-
; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[IN:%.*]], align 8
377-
; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[LD]] to i32
378-
; CHECK-NEXT: [[CASTI8:%.*]] = inttoptr i32 [[TMP0]] to ptr
379-
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[OFFSET:%.*]] to i32
380-
; CHECK-NEXT: [[GEPI8:%.*]] = getelementptr inbounds i8, ptr [[CASTI8]], i32 [[TMP1]]
381-
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[LD]] to i32
382-
; CHECK-NEXT: [[PTRCAST:%.*]] = inttoptr i32 [[TMP2]] to ptr
383-
; CHECK-NEXT: [[GEPI32:%.*]] = getelementptr inbounds nuw i8, ptr [[PTRCAST]], i32 4
384-
; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[GEPI32]], [[GEPI8]]
384+
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[LD:%.*]] to i32
385+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP2]], 4
385386
; CHECK-NEXT: ret i1 [[CMP]]
386387
;
387388
entry:

llvm/test/Transforms/InstCombine/load-bitcast32.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
; See https://llvm.org/LICENSE.txt for license information.
55
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66
;
7-
; Modifications (c) Copyright 2024 Advanced Micro Devices, Inc. or its
7+
; Modifications (c) Copyright 2024-2026 Advanced Micro Devices, Inc. or its
88
; affiliates
99

1010
; RUN: opt -passes=instcombine -S < %s | FileCheck %s
@@ -15,8 +15,7 @@ target datalayout = "p:32:32:32"
1515
define ptr @test1(ptr %x) {
1616
; CHECK-LABEL: @test1(
1717
; CHECK-NEXT: entry:
18-
; CHECK-NEXT: [[B1:%.*]] = load i32, ptr [[X:%.*]], align 4
19-
; CHECK-NEXT: [[C:%.*]] = inttoptr i32 [[B1]] to ptr
18+
; CHECK-NEXT: [[C:%.*]] = load ptr, ptr [[X:%.*]], align 4
2019
; CHECK-NEXT: ret ptr [[C]]
2120
;
2221
entry:

llvm/test/Transforms/InstCombine/load-intPtrType.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
; See https://llvm.org/LICENSE.txt for license information.
55
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66
;
7-
; (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
7+
; (c) Copyright 2024-2026 Advanced Micro Devices, Inc. or its affiliates
88

99
; RUN: opt -passes=instcombine -S < %s | FileCheck %s
1010

@@ -26,8 +26,7 @@ entry:
2626
define ptr @load_intPtrType_to_ptr(ptr %x) {
2727
; CHECK-LABEL: @load_intPtrType_to_ptr(
2828
; CHECK-NEXT: entry:
29-
; CHECK-NEXT: [[B1:%.*]] = load i20, ptr [[X:%.*]], align 4
30-
; CHECK-NEXT: [[C:%.*]] = inttoptr i20 [[B1]] to ptr
29+
; CHECK-NEXT: [[C:%.*]] = load ptr, ptr [[X:%.*]], align 4
3130
; CHECK-NEXT: ret ptr [[C]]
3231
;
3332
entry:

0 commit comments

Comments
 (0)