Skip to content

[DAG] shouldReduceLoadWidth - hasOneUse should check just the loaded value - not the chain #128167

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion llvm/include/llvm/CodeGen/SelectionDAGNodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -877,7 +877,21 @@ END_TWO_BYTE_PACK()

/// Return true if there are exactly NUSES uses of the indicated value.
/// This method ignores uses of other values defined by this operation.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const;
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const {
assert(Value < getNumValues() && "Bad value!");

// TODO: Only iterate over uses of a given value of the node
for (SDUse &U : uses()) {
if (U.getResNo() == Value) {
if (NUses == 0)
return false;
--NUses;
}
}

// Found exactly the right number of uses?
return NUses == 0;
}

/// Return true if there are any use of the indicated value.
/// This method ignores uses of other values defined by this operation.
Expand Down
2 changes: 1 addition & 1 deletion llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1817,7 +1817,7 @@ class TargetLoweringBase {
EVT NewVT) const {
Copy link
Collaborator

@topperc topperc Feb 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not move this definition to TargetLowering.cpp? Do we need the body inlined for some reason?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's an implicit dependency that I'm still trying to track down.

// By default, assume that it is cheaper to extract a subvector from a wide
// vector load rather than creating multiple narrow vector loads.
if (NewVT.isVector() && !Load->hasOneUse())
if (NewVT.isVector() && !SDValue(Load, 0).hasOneUse())
return false;

return true;
Expand Down
19 changes: 0 additions & 19 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12444,25 +12444,6 @@ const EVT *SDNode::getValueTypeList(MVT VT) {
return &SimpleVTArray.VTs[VT.SimpleTy];
}

/// hasNUsesOfValue - Return true if there are exactly NUSES uses of the
/// indicated value. This method ignores uses of other values defined by this
/// operation.
bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const {
assert(Value < getNumValues() && "Bad value!");

// TODO: Only iterate over uses of a given value of the node
for (SDUse &U : uses()) {
if (U.getResNo() == Value) {
if (NUses == 0)
return false;
--NUses;
}
}

// Found exactly the right number of uses?
return NUses == 0;
}

/// hasAnyUseOfValue - Return true if there are any use of the indicated
/// value. This method ignores uses of other values defined by this operation.
bool SDNode::hasAnyUseOfValue(unsigned Value) const {
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3268,7 +3268,8 @@ bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
// those uses are extracted directly into a store, then the extract + store
// can be store-folded. Therefore, it's probably not worth splitting the load.
EVT VT = Load->getValueType(0);
if ((VT.is256BitVector() || VT.is512BitVector()) && !Load->hasOneUse()) {
if ((VT.is256BitVector() || VT.is512BitVector()) &&
!SDValue(Load, 0).hasOneUse()) {
for (SDUse &Use : Load->uses()) {
// Skip uses of the chain value. Result 0 of the node is the load value.
if (Use.getResNo() != 0)
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/merge-store.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@ define void @blam() {
; SPLITTING-NEXT: adrp x8, g1
; SPLITTING-NEXT: add x8, x8, :lo12:g1
; SPLITTING-NEXT: adrp x9, g0
; SPLITTING-NEXT: ldr q0, [x9, :lo12:g0]
; SPLITTING-NEXT: ldr d0, [x9, :lo12:g0]
; SPLITTING-NEXT: str d0, [x8]
; SPLITTING-NEXT: ret
;
; MISALIGNED-LABEL: blam:
; MISALIGNED: // %bb.0:
; MISALIGNED-NEXT: adrp x8, g0
; MISALIGNED-NEXT: ldr q0, [x8, :lo12:g0]
; MISALIGNED-NEXT: ldr d0, [x8, :lo12:g0]
; MISALIGNED-NEXT: adrp x8, g1
; MISALIGNED-NEXT: add x8, x8, :lo12:g1
; MISALIGNED-NEXT: str d0, [x8]
Expand Down
51 changes: 25 additions & 26 deletions llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -531,18 +531,18 @@ define void @quux() #1 {
; CHECK-NEXT: ldr x18, [x19, #80] // 8-byte Folded Reload
; CHECK-NEXT: ldr x0, [x19, #72] // 8-byte Folded Reload
; CHECK-NEXT: ldr x1, [x19, #64] // 8-byte Folded Reload
; CHECK-NEXT: ldr x15, [x19, #224] // 8-byte Folded Reload
; CHECK-NEXT: ldr x2, [x19, #216] // 8-byte Folded Reload
; CHECK-NEXT: ldr x3, [x19, #120] // 8-byte Folded Reload
; CHECK-NEXT: ldr x4, [x19, #112] // 8-byte Folded Reload
; CHECK-NEXT: ldr x5, [x19, #104] // 8-byte Folded Reload
; CHECK-NEXT: ldr x6, [x19, #96] // 8-byte Folded Reload
; CHECK-NEXT: ldr x7, [x19, #224] // 8-byte Folded Reload
; CHECK-NEXT: ldr x20, [x19, #152] // 8-byte Folded Reload
; CHECK-NEXT: ldr x21, [x19, #144] // 8-byte Folded Reload
; CHECK-NEXT: ldr x22, [x19, #136] // 8-byte Folded Reload
; CHECK-NEXT: ldr x23, [x19, #128] // 8-byte Folded Reload
; CHECK-NEXT: ldr x16, [x19, #200] // 8-byte Folded Reload
; CHECK-NEXT: ldr x15, [x19, #208] // 8-byte Folded Reload
; CHECK-NEXT: ldr x16, [x19, #152] // 8-byte Folded Reload
; CHECK-NEXT: ldr x7, [x19, #144] // 8-byte Folded Reload
; CHECK-NEXT: ldr x20, [x19, #136] // 8-byte Folded Reload
; CHECK-NEXT: ldr x21, [x19, #128] // 8-byte Folded Reload
; CHECK-NEXT: ldr x23, [x19, #200] // 8-byte Folded Reload
; CHECK-NEXT: ldr x22, [x19, #208] // 8-byte Folded Reload
; CHECK-NEXT: ldr x24, [x19, #192] // 8-byte Folded Reload
; CHECK-NEXT: ldr x26, [x19, #176] // 8-byte Folded Reload
; CHECK-NEXT: ldr x25, [x19, #184] // 8-byte Folded Reload
Expand All @@ -562,36 +562,34 @@ define void @quux() #1 {
; CHECK-NEXT: add x25, x25, x27, lsl #2
; CHECK-NEXT: str x25, [x26]
; CHECK-NEXT: ldr p0, [x24]
; CHECK-NEXT: ldr x24, [x16]
; CHECK-NEXT: ldr x24, [x23]
; CHECK-NEXT: mov p8.b, p0.b
; CHECK-NEXT: ld1w { z16.s, z24.s }, pn8/z, [x24]
; CHECK-NEXT: mov z0.d, z16.d
; CHECK-NEXT: mov z1.d, z24.d
; CHECK-NEXT: st1w { z1.s }, p2, [x13, #1, mul vl]
; CHECK-NEXT: st1w { z0.s }, p2, [x13]
; CHECK-NEXT: ldr x24, [x15]
; CHECK-NEXT: ldr x15, [x16]
; CHECK-NEXT: add x15, x15, x24, lsl #2
; CHECK-NEXT: str x15, [x16]
; CHECK-NEXT: mov x16, x2
; CHECK-NEXT: incd x16
; CHECK-NEXT: ldr x24, [x22]
; CHECK-NEXT: ldr x22, [x23]
; CHECK-NEXT: add x22, x22, x24, lsl #2
; CHECK-NEXT: str x22, [x23]
; CHECK-NEXT: ldr p1, [x2]
; CHECK-NEXT: mov x15, x7
; CHECK-NEXT: incd x15
; CHECK-NEXT: ldr p0, [x7]
; CHECK-NEXT: ldr p0, [x15]
; CHECK-NEXT: ld1w { z1.s }, p2/z, [x14]
; CHECK-NEXT: ld1w { z0.s }, p2/z, [x13]
; CHECK-NEXT: str p1, [x23]
; CHECK-NEXT: str p0, [x22]
; CHECK-NEXT: st1w { z1.s }, p2, [x21]
; CHECK-NEXT: st1w { z0.s }, p2, [x20]
; CHECK-NEXT: ldr p0, [x23]
; CHECK-NEXT: ldr p1, [x22]
; CHECK-NEXT: ld1w { z0.s }, p2/z, [x21]
; CHECK-NEXT: ld1w { z1.s }, p2/z, [x20]
; CHECK-NEXT: str p1, [x21]
; CHECK-NEXT: str p0, [x20]
; CHECK-NEXT: st1w { z1.s }, p2, [x7]
; CHECK-NEXT: st1w { z0.s }, p2, [x16]
; CHECK-NEXT: ldr p0, [x21]
; CHECK-NEXT: ldr p1, [x20]
; CHECK-NEXT: ld1w { z0.s }, p2/z, [x7]
; CHECK-NEXT: ld1w { z1.s }, p2/z, [x16]
; CHECK-NEXT: fmopa za0.s, p0/m, p1/m, z0.s, z1.s
; CHECK-NEXT: mov x16, x2
; CHECK-NEXT: incd x16
; CHECK-NEXT: ldr p1, [x16]
; CHECK-NEXT: ldr p0, [x7]
; CHECK-NEXT: ldr p0, [x15]
; CHECK-NEXT: ld1w { z1.s }, p2/z, [x14, #1, mul vl]
; CHECK-NEXT: ld1w { z0.s }, p2/z, [x13]
; CHECK-NEXT: str p1, [x6]
Expand All @@ -604,6 +602,7 @@ define void @quux() #1 {
; CHECK-NEXT: ld1w { z1.s }, p2/z, [x3]
; CHECK-NEXT: fmopa za1.s, p0/m, p1/m, z0.s, z1.s
; CHECK-NEXT: ldr p1, [x2]
; CHECK-NEXT: incd x15
; CHECK-NEXT: ldr p0, [x15]
; CHECK-NEXT: ld1w { z1.s }, p2/z, [x14]
; CHECK-NEXT: ld1w { z0.s }, p2/z, [x13, #1, mul vl]
Expand Down
Loading
Loading