-
Notifications
You must be signed in to change notification settings - Fork 13.3k
release/20.x: [HEXAGON] Fix corner cases for hwloops pass (#135439) #135657
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
llvmbot
wants to merge
1
commit into
llvm:release/20.x
Choose a base branch
from
llvmbot:issue133241
base: release/20.x
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+325
−3
Conversation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add check to make sure Dist > 0 or Dist < 0 for appropriate cmp cases to hexagon hardware loops pass. The change modifies the HexagonHardwareLoops pass to add runtime checks to make sure that end_value > initial_value for less than comparisons and end_value < initial_value for greater than comparisons. Fix for llvm#133241 @androm3da @iajbar PTAL --------- Co-authored-by: aankit-quic <[email protected]> (cherry picked from commit da8ce56)
@iajbar What do you think about merging this PR to the release branch? |
@llvm/pr-subscribers-backend-hexagon Author: None (llvmbot) ChangesBackport da8ce56 Requested by: @androm3da Full diff: https://github.com/llvm/llvm-project/pull/135657.diff 3 Files Affected:
diff --git a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 9334746349240..dd4b240455126 100644
--- a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -731,6 +731,11 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
Register IVReg,
int64_t IVBump,
Comparison::Kind Cmp) const {
+ LLVM_DEBUG(llvm::dbgs() << "Loop: " << *Loop << "\n");
+ LLVM_DEBUG(llvm::dbgs() << "Initial Value: " << *Start << "\n");
+ LLVM_DEBUG(llvm::dbgs() << "End Value: " << *End << "\n");
+ LLVM_DEBUG(llvm::dbgs() << "Inc/Dec Value: " << IVBump << "\n");
+ LLVM_DEBUG(llvm::dbgs() << "Comparison: " << Cmp << "\n");
// Cannot handle comparison EQ, i.e. while (A == B).
if (Cmp == Comparison::EQ)
return nullptr;
@@ -846,6 +851,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
if (IVBump < 0) {
std::swap(Start, End);
IVBump = -IVBump;
+ std::swap(CmpLess, CmpGreater);
}
// Cmp may now have a wrong direction, e.g. LEs may now be GEs.
// Signedness, and "including equality" are preserved.
@@ -989,7 +995,45 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
CountSR = 0;
}
- return new CountValue(CountValue::CV_Register, CountR, CountSR);
+ const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass;
+ Register MuxR = CountR;
+ unsigned MuxSR = CountSR;
+ // For the loop count to be valid unsigned number, CmpLess should imply
+ // Dist >= 0. Similarly, CmpGreater should imply Dist < 0. We can skip the
+ // check if the initial distance is zero and the comparison is LTu || LTEu.
+ if (!(Start->isImm() && StartV == 0 && Comparison::isUnsigned(Cmp) &&
+ CmpLess) &&
+ (CmpLess || CmpGreater)) {
+ // Generate:
+ // DistCheck = CMP_GT DistR, 0 --> CmpLess
+ // DistCheck = CMP_GT DistR, -1 --> CmpGreater
+ Register DistCheckR = MRI->createVirtualRegister(PredRC);
+ const MCInstrDesc &DistCheckD = TII->get(Hexagon::C2_cmpgti);
+ BuildMI(*PH, InsertPos, DL, DistCheckD, DistCheckR)
+ .addReg(DistR, 0, DistSR)
+ .addImm((CmpLess) ? 0 : -1);
+
+ // Generate:
+ // MUXR = MUX DistCheck, CountR, 1 --> CmpLess
+ // MUXR = MUX DistCheck, 1, CountR --> CmpGreater
+ MuxR = MRI->createVirtualRegister(IntRC);
+ if (CmpLess) {
+ const MCInstrDesc &MuxD = TII->get(Hexagon::C2_muxir);
+ BuildMI(*PH, InsertPos, DL, MuxD, MuxR)
+ .addReg(DistCheckR)
+ .addReg(CountR, 0, CountSR)
+ .addImm(1);
+ } else {
+ const MCInstrDesc &MuxD = TII->get(Hexagon::C2_muxri);
+ BuildMI(*PH, InsertPos, DL, MuxD, MuxR)
+ .addReg(DistCheckR)
+ .addImm(1)
+ .addReg(CountR, 0, CountSR);
+ }
+ MuxSR = 0;
+ }
+
+ return new CountValue(CountValue::CV_Register, MuxR, MuxSR);
}
/// Return true if the operation is invalid within hardware loop.
diff --git a/llvm/test/CodeGen/Hexagon/hwloop-dist-check.mir b/llvm/test/CodeGen/Hexagon/hwloop-dist-check.mir
new file mode 100644
index 0000000000000..9f8c14a314309
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/hwloop-dist-check.mir
@@ -0,0 +1,277 @@
+# RUN: llc --mtriple=hexagon -run-pass=hwloops %s -o - | FileCheck %s
+
+# CHECK-LABEL: name: f
+# CHECK: [[R1:%[0-9]+]]:predregs = C2_cmpgti [[R0:%[0-9]+]], 0
+# CHECK: [[R3:%[0-9]+]]:intregs = C2_muxir [[R1:%[0-9]+]], [[R2:%[0-9]+]], 1
+# CHECK-LABEL: name: g
+# CHECK: [[R1:%[0-9]+]]:predregs = C2_cmpgti [[R0:%[0-9]+]], 0
+# CHECK: [[R3:%[0-9]+]]:intregs = C2_muxir [[R1:%[0-9]+]], [[R2:%[0-9]+]], 1
+--- |
+ @a = dso_local global [255 x ptr] zeroinitializer, align 8
+
+ ; Function Attrs: minsize nofree norecurse nosync nounwind optsize memory(write, argmem: none, inaccessiblemem: none)
+ define dso_local void @f(i32 noundef %m) local_unnamed_addr #0 {
+ entry:
+ %cond = tail call i32 @llvm.smax.i32(i32 %m, i32 2)
+ %0 = add nsw i32 %cond, -4
+ %1 = shl i32 %cond, 3
+ %cgep = getelementptr i8, ptr @a, i32 %1
+ %cgep36 = bitcast ptr @a to ptr
+ br label %do.body
+
+ do.body: ; preds = %do.body, %entry
+ %lsr.iv1 = phi ptr [ %cgep4, %do.body ], [ %cgep, %entry ]
+ %lsr.iv = phi i32 [ %lsr.iv.next, %do.body ], [ %0, %entry ]
+ %sh.0 = phi i32 [ 256, %entry ], [ %shr, %do.body ]
+ %shr = lshr i32 %sh.0, 1
+ %cgep5 = getelementptr inbounds [255 x ptr], ptr %cgep36, i32 0, i32 %shr
+ store ptr %lsr.iv1, ptr %cgep5, align 4, !tbaa !5
+ %lsr.iv.next = add nsw i32 %lsr.iv, 4
+ %cmp1 = icmp samesign ult i32 %lsr.iv.next, 1073741836
+ %cgep4 = getelementptr i8, ptr %lsr.iv1, i32 32
+ br i1 %cmp1, label %do.body, label %do.end, !llvm.loop !9
+
+ do.end: ; preds = %do.body
+ ret void
+ }
+
+ ; Function Attrs: minsize nofree norecurse nosync nounwind optsize memory(write, argmem: none, inaccessiblemem: none)
+ define dso_local void @g(i32 noundef %m) local_unnamed_addr #0 {
+ entry:
+ %0 = add i32 %m, -4
+ %1 = shl i32 %m, 3
+ %cgep = getelementptr i8, ptr @a, i32 %1
+ %cgep36 = bitcast ptr @a to ptr
+ br label %do.body
+
+ do.body: ; preds = %do.body, %entry
+ %lsr.iv1 = phi ptr [ %cgep4, %do.body ], [ %cgep, %entry ]
+ %lsr.iv = phi i32 [ %lsr.iv.next, %do.body ], [ %0, %entry ]
+ %sh.0 = phi i32 [ 256, %entry ], [ %shr, %do.body ]
+ %shr = lshr i32 %sh.0, 1
+ %cgep5 = getelementptr inbounds [255 x ptr], ptr %cgep36, i32 0, i32 %shr
+ store ptr %lsr.iv1, ptr %cgep5, align 4, !tbaa !5
+ %lsr.iv.next = add i32 %lsr.iv, 4
+ %cmp = icmp slt i32 %lsr.iv.next, 1073741836
+ %cgep4 = getelementptr i8, ptr %lsr.iv1, i32 32
+ br i1 %cmp, label %do.body, label %do.end, !llvm.loop !11
+
+ do.end: ; preds = %do.body
+ ret void
+ }
+
+ ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+ declare i32 @llvm.smax.i32(i32, i32) #1
+
+ !llvm.module.flags = !{!0, !1, !2, !3}
+ !0 = !{i32 1, !"wchar_size", i32 4}
+ !1 = !{i32 8, !"PIC Level", i32 2}
+ !2 = !{i32 7, !"PIE Level", i32 2}
+ !3 = !{i32 7, !"frame-pointer", i32 2}
+ !5 = !{!6, !6, i64 0}
+ !6 = !{!"any pointer", !7, i64 0}
+ !7 = !{!"omnipotent char", !8, i64 0}
+ !8 = !{!"Simple C/C++ TBAA"}
+ !9 = distinct !{!9, !10}
+ !10 = !{!"llvm.loop.mustprogress"}
+ !11 = distinct !{!11, !10}
+
+...
+---
+name: f
+alignment: 4
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+noPhis: false
+isSSA: true
+noVRegs: false
+hasFakeUses: false
+callsEHReturn: false
+callsUnwindInit: false
+hasEHScopes: false
+hasEHFunclets: false
+isOutlined: false
+debugInstrRef: false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+ - { id: 0, class: intregs, preferred-register: '', flags: [ ] }
+ - { id: 1, class: intregs, preferred-register: '', flags: [ ] }
+ - { id: 2, class: intregs, preferred-register: '', flags: [ ] }
+ - { id: 3, class: intregs, preferred-register: '', flags: [ ] }
+ - { id: 4, class: intregs, preferred-register: '', flags: [ ] }
+ - { id: 5, class: intregs, preferred-register: '', flags: [ ] }
+ - { id: 6, class: intregs, preferred-register: '', flags: [ ] }
+ - { id: 7, class: intregs, preferred-register: '', flags: [ ] }
+ - { id: 8, class: intregs, preferred-register: '', flags: [ ] }
+ - { id: 9, class: intregs, preferred-register: '', flags: [ ] }
+ - { id: 10, class: intregs, preferred-register: '', flags: [ ] }
+ - { id: 11, class: intregs, preferred-register: '', flags: [ ] }
+ - { id: 12, class: intregs, preferred-register: '', flags: [ ] }
+ - { id: 13, class: predregs, preferred-register: '', flags: [ ] }
+ - { id: 14, class: predregs, preferred-register: '', flags: [ ] }
+ - { id: 15, class: intregs, preferred-register: '', flags: [ ] }
+liveins:
+ - { reg: '$r0', virtual-reg: '%9' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 1
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ functionContext: ''
+ maxCallFrameSize: 4294967295
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ hasTailCall: false
+ isCalleeSavedInfoValid: false
+ localFrameSize: 0
+ savePoint: ''
+ restorePoint: ''
+fixedStack: []
+stack: []
+entry_values: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ successors: %bb.1(0x80000000)
+ liveins: $r0
+
+ %9:intregs = COPY $r0
+ %11:intregs = A2_tfrsi 2
+ %12:intregs = A2_max %9, %11
+ %0:intregs = nsw A2_addi %12, -4
+ %1:intregs = S4_addi_asl_ri @a, %12, 3
+ %2:intregs = A2_tfrsi @a
+ %10:intregs = A2_tfrsi 256
+
+ bb.1.do.body:
+ successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+
+ %3:intregs = PHI %1, %bb.0, %8, %bb.1
+ %4:intregs = PHI %0, %bb.0, %7, %bb.1
+ %5:intregs = PHI %10, %bb.0, %15, %bb.1
+ %15:intregs = S2_extractu %5, 8, 1
+ S4_storeri_rr %2, %15, 2, %3 :: (store (s32) into %ir.cgep5, !tbaa !5)
+ %7:intregs = nsw A2_addi %4, 4
+ %13:predregs = C2_cmpgtui %7, 1073741835
+ %8:intregs = A2_addi %3, 32
+ J2_jumpf %13, %bb.1, implicit-def dead $pc
+ J2_jump %bb.2, implicit-def dead $pc
+
+ bb.2.do.end:
+ PS_jmpret $r31, implicit-def dead $pc
+
+...
+---
+name: g
+alignment: 4
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+noPhis: false
+isSSA: true
+noVRegs: false
+hasFakeUses: false
+callsEHReturn: false
+callsUnwindInit: false
+hasEHScopes: false
+hasEHFunclets: false
+isOutlined: false
+debugInstrRef: false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+ - { id: 0, class: intregs, preferred-register: '', flags: [ ] }
+ - { id: 1, class: intregs, preferred-register: '', flags: [ ] }
+ - { id: 2, class: intregs, preferred-register: '', flags: [ ] }
+ - { id: 3, class: intregs, preferred-register: '', flags: [ ] }
+ - { id: 4, class: intregs, preferred-register: '', flags: [ ] }
+ - { id: 5, class: intregs, preferred-register: '', flags: [ ] }
+ - { id: 6, class: intregs, preferred-register: '', flags: [ ] }
+ - { id: 7, class: intregs, preferred-register: '', flags: [ ] }
+ - { id: 8, class: intregs, preferred-register: '', flags: [ ] }
+ - { id: 9, class: intregs, preferred-register: '', flags: [ ] }
+ - { id: 10, class: intregs, preferred-register: '', flags: [ ] }
+ - { id: 11, class: predregs, preferred-register: '', flags: [ ] }
+ - { id: 12, class: predregs, preferred-register: '', flags: [ ] }
+ - { id: 13, class: intregs, preferred-register: '', flags: [ ] }
+liveins:
+ - { reg: '$r0', virtual-reg: '%9' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 1
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ functionContext: ''
+ maxCallFrameSize: 4294967295
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ hasTailCall: false
+ isCalleeSavedInfoValid: false
+ localFrameSize: 0
+ savePoint: ''
+ restorePoint: ''
+fixedStack: []
+stack: []
+entry_values: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ successors: %bb.1(0x80000000)
+ liveins: $r0
+
+ %9:intregs = COPY $r0
+ %0:intregs = A2_addi %9, -4
+ %1:intregs = S4_addi_asl_ri @a, %9, 3
+ %2:intregs = A2_tfrsi @a
+ %10:intregs = A2_tfrsi 256
+
+ bb.1.do.body:
+ successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+
+ %3:intregs = PHI %1, %bb.0, %8, %bb.1
+ %4:intregs = PHI %0, %bb.0, %7, %bb.1
+ %5:intregs = PHI %10, %bb.0, %13, %bb.1
+ %13:intregs = S2_extractu %5, 8, 1
+ S4_storeri_rr %2, %13, 2, %3 :: (store (s32) into %ir.cgep5, !tbaa !5)
+ %7:intregs = A2_addi %4, 4
+ %11:predregs = C2_cmpgti %7, 1073741835
+ %8:intregs = A2_addi %3, 32
+ J2_jumpf %11, %bb.1, implicit-def dead $pc
+ J2_jump %bb.2, implicit-def dead $pc
+
+ bb.2.do.end:
+ PS_jmpret $r31, implicit-def dead $pc
+
+...
diff --git a/llvm/test/CodeGen/Hexagon/swp-phi-start.ll b/llvm/test/CodeGen/Hexagon/swp-phi-start.ll
index 52c258656ec22..6c2b08d83b1c7 100644
--- a/llvm/test/CodeGen/Hexagon/swp-phi-start.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-phi-start.ll
@@ -5,8 +5,9 @@
; the same stage.
; CHECK-DAG: [[REG3:(r[0-9]+)]] = add([[REG1:(r[0-9]+)]],#-1)
-; CHECK-DAG: [[REG2:(r[0-9]+)]] = add([[REG1]],#-1)
-; CHECK-DAG: loop0(.LBB0_[[LOOP:.]],[[REG3]])
+; CHECK-DAG: [[REG2:(r[0-9]+)]] = add([[REG4:(r[0-9]+)]],#-1)
+; CHECK-DAG: loop0(.LBB0_[[LOOP:.]],[[REG2]])
+; CHECK-NOT: = [[REG3]]
; CHECK-NOT: = [[REG2]]
; CHECK: .LBB0_[[LOOP]]:
; CHECK: }{{[ \t]*}}:endloop
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Backport da8ce56
Requested by: @androm3da