Skip to content

Commit 7de6492

Browse files
authored
[DAG] shouldReduceLoadWidth - hasOneUse should check just the loaded value - not the chain (#128167)
The hasOneUse check was failing in any case where the load was part of a chain - we should only be checking if the loaded value has one use, and any updates to the chain should be handled by the fold calling shouldReduceLoadWidth. I've updated the x86 implementation to match, although it has no effect here yet (I'm still looking at how to improve the x86 implementation) as the inner for loop was discarding chain uses anyway. By using SDValue::hasOneUse instead this patch exposes a missing dependency on the LLVMSelectionDAG library in a lot of tools + unittests, which resulted in having to make SDNode::hasNUsesOfValue inline. Noticed while fighting the x86 regressions in #122671
1 parent 9cbdcfc commit 7de6492

11 files changed

+231
-406
lines changed

Diff for: llvm/include/llvm/CodeGen/SelectionDAGNodes.h

+15-1
Original file line numberDiff line numberDiff line change
@@ -877,7 +877,21 @@ END_TWO_BYTE_PACK()
877877

878878
/// Return true if there are exactly NUSES uses of the indicated value.
879879
/// This method ignores uses of other values defined by this operation.
880-
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const;
880+
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const {
881+
assert(Value < getNumValues() && "Bad value!");
882+
883+
// TODO: Only iterate over uses of a given value of the node
884+
for (SDUse &U : uses()) {
885+
if (U.getResNo() == Value) {
886+
if (NUses == 0)
887+
return false;
888+
--NUses;
889+
}
890+
}
891+
892+
// Found exactly the right number of uses?
893+
return NUses == 0;
894+
}
881895

882896
/// Return true if there are any use of the indicated value.
883897
/// This method ignores uses of other values defined by this operation.

Diff for: llvm/include/llvm/CodeGen/TargetLowering.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -1817,7 +1817,7 @@ class TargetLoweringBase {
18171817
EVT NewVT) const {
18181818
// By default, assume that it is cheaper to extract a subvector from a wide
18191819
// vector load rather than creating multiple narrow vector loads.
1820-
if (NewVT.isVector() && !Load->hasOneUse())
1820+
if (NewVT.isVector() && !SDValue(Load, 0).hasOneUse())
18211821
return false;
18221822

18231823
return true;

Diff for: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

-19
Original file line numberDiff line numberDiff line change
@@ -12444,25 +12444,6 @@ const EVT *SDNode::getValueTypeList(MVT VT) {
1244412444
return &SimpleVTArray.VTs[VT.SimpleTy];
1244512445
}
1244612446

12447-
/// hasNUsesOfValue - Return true if there are exactly NUSES uses of the
12448-
/// indicated value. This method ignores uses of other values defined by this
12449-
/// operation.
12450-
bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const {
12451-
assert(Value < getNumValues() && "Bad value!");
12452-
12453-
// TODO: Only iterate over uses of a given value of the node
12454-
for (SDUse &U : uses()) {
12455-
if (U.getResNo() == Value) {
12456-
if (NUses == 0)
12457-
return false;
12458-
--NUses;
12459-
}
12460-
}
12461-
12462-
// Found exactly the right number of uses?
12463-
return NUses == 0;
12464-
}
12465-
1246612447
/// hasAnyUseOfValue - Return true if there are any use of the indicated
1246712448
/// value. This method ignores uses of other values defined by this operation.
1246812449
bool SDNode::hasAnyUseOfValue(unsigned Value) const {

Diff for: llvm/lib/Target/X86/X86ISelLowering.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -3268,7 +3268,8 @@ bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
32683268
// those uses are extracted directly into a store, then the extract + store
32693269
// can be store-folded. Therefore, it's probably not worth splitting the load.
32703270
EVT VT = Load->getValueType(0);
3271-
if ((VT.is256BitVector() || VT.is512BitVector()) && !Load->hasOneUse()) {
3271+
if ((VT.is256BitVector() || VT.is512BitVector()) &&
3272+
!SDValue(Load, 0).hasOneUse()) {
32723273
for (SDUse &Use : Load->uses()) {
32733274
// Skip uses of the chain value. Result 0 of the node is the load value.
32743275
if (Use.getResNo() != 0)

Diff for: llvm/test/CodeGen/AArch64/merge-store.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,14 @@ define void @blam() {
1111
; SPLITTING-NEXT: adrp x8, g1
1212
; SPLITTING-NEXT: add x8, x8, :lo12:g1
1313
; SPLITTING-NEXT: adrp x9, g0
14-
; SPLITTING-NEXT: ldr q0, [x9, :lo12:g0]
14+
; SPLITTING-NEXT: ldr d0, [x9, :lo12:g0]
1515
; SPLITTING-NEXT: str d0, [x8]
1616
; SPLITTING-NEXT: ret
1717
;
1818
; MISALIGNED-LABEL: blam:
1919
; MISALIGNED: // %bb.0:
2020
; MISALIGNED-NEXT: adrp x8, g0
21-
; MISALIGNED-NEXT: ldr q0, [x8, :lo12:g0]
21+
; MISALIGNED-NEXT: ldr d0, [x8, :lo12:g0]
2222
; MISALIGNED-NEXT: adrp x8, g1
2323
; MISALIGNED-NEXT: add x8, x8, :lo12:g1
2424
; MISALIGNED-NEXT: str d0, [x8]

Diff for: llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll

+25-26
Original file line numberDiff line numberDiff line change
@@ -531,18 +531,18 @@ define void @quux() #1 {
531531
; CHECK-NEXT: ldr x18, [x19, #80] // 8-byte Folded Reload
532532
; CHECK-NEXT: ldr x0, [x19, #72] // 8-byte Folded Reload
533533
; CHECK-NEXT: ldr x1, [x19, #64] // 8-byte Folded Reload
534+
; CHECK-NEXT: ldr x15, [x19, #224] // 8-byte Folded Reload
534535
; CHECK-NEXT: ldr x2, [x19, #216] // 8-byte Folded Reload
535536
; CHECK-NEXT: ldr x3, [x19, #120] // 8-byte Folded Reload
536537
; CHECK-NEXT: ldr x4, [x19, #112] // 8-byte Folded Reload
537538
; CHECK-NEXT: ldr x5, [x19, #104] // 8-byte Folded Reload
538539
; CHECK-NEXT: ldr x6, [x19, #96] // 8-byte Folded Reload
539-
; CHECK-NEXT: ldr x7, [x19, #224] // 8-byte Folded Reload
540-
; CHECK-NEXT: ldr x20, [x19, #152] // 8-byte Folded Reload
541-
; CHECK-NEXT: ldr x21, [x19, #144] // 8-byte Folded Reload
542-
; CHECK-NEXT: ldr x22, [x19, #136] // 8-byte Folded Reload
543-
; CHECK-NEXT: ldr x23, [x19, #128] // 8-byte Folded Reload
544-
; CHECK-NEXT: ldr x16, [x19, #200] // 8-byte Folded Reload
545-
; CHECK-NEXT: ldr x15, [x19, #208] // 8-byte Folded Reload
540+
; CHECK-NEXT: ldr x16, [x19, #152] // 8-byte Folded Reload
541+
; CHECK-NEXT: ldr x7, [x19, #144] // 8-byte Folded Reload
542+
; CHECK-NEXT: ldr x20, [x19, #136] // 8-byte Folded Reload
543+
; CHECK-NEXT: ldr x21, [x19, #128] // 8-byte Folded Reload
544+
; CHECK-NEXT: ldr x23, [x19, #200] // 8-byte Folded Reload
545+
; CHECK-NEXT: ldr x22, [x19, #208] // 8-byte Folded Reload
546546
; CHECK-NEXT: ldr x24, [x19, #192] // 8-byte Folded Reload
547547
; CHECK-NEXT: ldr x26, [x19, #176] // 8-byte Folded Reload
548548
; CHECK-NEXT: ldr x25, [x19, #184] // 8-byte Folded Reload
@@ -562,36 +562,34 @@ define void @quux() #1 {
562562
; CHECK-NEXT: add x25, x25, x27, lsl #2
563563
; CHECK-NEXT: str x25, [x26]
564564
; CHECK-NEXT: ldr p0, [x24]
565-
; CHECK-NEXT: ldr x24, [x16]
565+
; CHECK-NEXT: ldr x24, [x23]
566566
; CHECK-NEXT: mov p8.b, p0.b
567567
; CHECK-NEXT: ld1w { z16.s, z24.s }, pn8/z, [x24]
568568
; CHECK-NEXT: mov z0.d, z16.d
569569
; CHECK-NEXT: mov z1.d, z24.d
570570
; CHECK-NEXT: st1w { z1.s }, p2, [x13, #1, mul vl]
571571
; CHECK-NEXT: st1w { z0.s }, p2, [x13]
572-
; CHECK-NEXT: ldr x24, [x15]
573-
; CHECK-NEXT: ldr x15, [x16]
574-
; CHECK-NEXT: add x15, x15, x24, lsl #2
575-
; CHECK-NEXT: str x15, [x16]
576-
; CHECK-NEXT: mov x16, x2
577-
; CHECK-NEXT: incd x16
572+
; CHECK-NEXT: ldr x24, [x22]
573+
; CHECK-NEXT: ldr x22, [x23]
574+
; CHECK-NEXT: add x22, x22, x24, lsl #2
575+
; CHECK-NEXT: str x22, [x23]
578576
; CHECK-NEXT: ldr p1, [x2]
579-
; CHECK-NEXT: mov x15, x7
580-
; CHECK-NEXT: incd x15
581-
; CHECK-NEXT: ldr p0, [x7]
577+
; CHECK-NEXT: ldr p0, [x15]
582578
; CHECK-NEXT: ld1w { z1.s }, p2/z, [x14]
583579
; CHECK-NEXT: ld1w { z0.s }, p2/z, [x13]
584-
; CHECK-NEXT: str p1, [x23]
585-
; CHECK-NEXT: str p0, [x22]
586-
; CHECK-NEXT: st1w { z1.s }, p2, [x21]
587-
; CHECK-NEXT: st1w { z0.s }, p2, [x20]
588-
; CHECK-NEXT: ldr p0, [x23]
589-
; CHECK-NEXT: ldr p1, [x22]
590-
; CHECK-NEXT: ld1w { z0.s }, p2/z, [x21]
591-
; CHECK-NEXT: ld1w { z1.s }, p2/z, [x20]
580+
; CHECK-NEXT: str p1, [x21]
581+
; CHECK-NEXT: str p0, [x20]
582+
; CHECK-NEXT: st1w { z1.s }, p2, [x7]
583+
; CHECK-NEXT: st1w { z0.s }, p2, [x16]
584+
; CHECK-NEXT: ldr p0, [x21]
585+
; CHECK-NEXT: ldr p1, [x20]
586+
; CHECK-NEXT: ld1w { z0.s }, p2/z, [x7]
587+
; CHECK-NEXT: ld1w { z1.s }, p2/z, [x16]
592588
; CHECK-NEXT: fmopa za0.s, p0/m, p1/m, z0.s, z1.s
589+
; CHECK-NEXT: mov x16, x2
590+
; CHECK-NEXT: incd x16
593591
; CHECK-NEXT: ldr p1, [x16]
594-
; CHECK-NEXT: ldr p0, [x7]
592+
; CHECK-NEXT: ldr p0, [x15]
595593
; CHECK-NEXT: ld1w { z1.s }, p2/z, [x14, #1, mul vl]
596594
; CHECK-NEXT: ld1w { z0.s }, p2/z, [x13]
597595
; CHECK-NEXT: str p1, [x6]
@@ -604,6 +602,7 @@ define void @quux() #1 {
604602
; CHECK-NEXT: ld1w { z1.s }, p2/z, [x3]
605603
; CHECK-NEXT: fmopa za1.s, p0/m, p1/m, z0.s, z1.s
606604
; CHECK-NEXT: ldr p1, [x2]
605+
; CHECK-NEXT: incd x15
607606
; CHECK-NEXT: ldr p0, [x15]
608607
; CHECK-NEXT: ld1w { z1.s }, p2/z, [x14]
609608
; CHECK-NEXT: ld1w { z0.s }, p2/z, [x13, #1, mul vl]

0 commit comments

Comments
 (0)