Skip to content

DAG: Handle load in SimplifyDemandedVectorElts #122671

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3479,6 +3479,37 @@ bool TargetLowering::SimplifyDemandedVectorElts(

break;
}
case ISD::LOAD: {
auto *Ld = cast<LoadSDNode>(Op);
if (!ISD::isNormalLoad(Ld) || !Ld->isSimple())
break;

// TODO: Handle arbitrary vector extract for isMask
if (DemandedElts.popcount() != 1)
break;

EVT VT = Ld->getValueType(0);
if (TLO.LegalOperations() &&
!isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT))
break;

EVT EltVT = VT.getVectorElementType();
SDLoc DL(Ld);

unsigned Idx = DemandedElts.countTrailingZeros();

SDValue IdxVal = TLO.DAG.getVectorIdxConstant(Idx, DL);
SDValue Scalarized =
scalarizeExtractedVectorLoad(EltVT, DL, VT, IdxVal, Ld, TLO.DAG);
if (!Scalarized)
break;

TLO.DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Scalarized.getValue(1));

SDValue Insert = TLO.DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
TLO.DAG.getUNDEF(VT), Scalarized, IdxVal);
return TLO.CombineTo(Op, Insert);
}
case ISD::VECTOR_SHUFFLE: {
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
Expand Down
54 changes: 27 additions & 27 deletions llvm/test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ define void @test_i64_v2f32(ptr %p, ptr %q) {
; CHECK: ld1 { v{{[0-9]+}}.2s }
; CHECK: rev64 v{{[0-9]+}}.2s
; CHECK: str
%1 = load <2 x float>, ptr %p
%1 = load volatile <2 x float>, ptr %p
%2 = fadd <2 x float> %1, %1
%3 = bitcast <2 x float> %2 to i64
%4 = add i64 %3, %3
Expand All @@ -43,7 +43,7 @@ define void @test_i64_v2i32(ptr %p, ptr %q) {
; CHECK: ld1 { v{{[0-9]+}}.2s }
; CHECK: rev64 v{{[0-9]+}}.2s
; CHECK: str
%1 = load <2 x i32>, ptr %p
%1 = load volatile <2 x i32>, ptr %p
%2 = add <2 x i32> %1, %1
%3 = bitcast <2 x i32> %2 to i64
%4 = add i64 %3, %3
Expand Down Expand Up @@ -121,7 +121,7 @@ define void @test_f64_v2f32(ptr %p, ptr %q) {
; CHECK: ld1 { v{{[0-9]+}}.2s }
; CHECK: rev64 v{{[0-9]+}}.2s
; CHECK: str
%1 = load <2 x float>, ptr %p
%1 = load volatile <2 x float>, ptr %p
%2 = fadd <2 x float> %1, %1
%3 = bitcast <2 x float> %2 to double
%4 = fadd double %3, %3
Expand All @@ -134,7 +134,7 @@ define void @test_f64_v2i32(ptr %p, ptr %q) {
; CHECK: ld1 { v{{[0-9]+}}.2s }
; CHECK: rev64 v{{[0-9]+}}.2s
; CHECK: str
%1 = load <2 x i32>, ptr %p
%1 = load volatile <2 x i32>, ptr %p
%2 = add <2 x i32> %1, %1
%3 = bitcast <2 x i32> %2 to double
%4 = fadd double %3, %3
Expand Down Expand Up @@ -213,7 +213,7 @@ define void @test_v1i64_v2f32(ptr %p, ptr %q) {
; CHECK: ld1 { v{{[0-9]+}}.2s }
; CHECK: rev64 v{{[0-9]+}}.2s
; CHECK: str
%1 = load <2 x float>, ptr %p
%1 = load volatile <2 x float>, ptr %p
%2 = fadd <2 x float> %1, %1
%3 = bitcast <2 x float> %2 to <1 x i64>
%4 = add <1 x i64> %3, %3
Expand All @@ -226,7 +226,7 @@ define void @test_v1i64_v2i32(ptr %p, ptr %q) {
; CHECK: ld1 { v{{[0-9]+}}.2s }
; CHECK: rev64 v{{[0-9]+}}.2s
; CHECK: str
%1 = load <2 x i32>, ptr %p
%1 = load volatile <2 x i32>, ptr %p
%2 = add <2 x i32> %1, %1
%3 = bitcast <2 x i32> %2 to <1 x i64>
%4 = add <1 x i64> %3, %3
Expand Down Expand Up @@ -318,7 +318,7 @@ define void @test_v2f32_v1i64(ptr %p, ptr %q) {
define void @test_v2f32_v2i32(ptr %p, ptr %q) {
; CHECK: ld1 { v{{[0-9]+}}.2s }
; CHECK: st1 { v{{[0-9]+}}.2s }
%1 = load <2 x i32>, ptr %p
%1 = load volatile <2 x i32>, ptr %p
%2 = add <2 x i32> %1, %1
%3 = bitcast <2 x i32> %2 to <2 x float>
%4 = fadd <2 x float> %3, %3
Expand Down Expand Up @@ -410,7 +410,7 @@ define void @test_v2i32_v1i64(ptr %p, ptr %q) {
define void @test_v2i32_v2f32(ptr %p, ptr %q) {
; CHECK: ld1 { v{{[0-9]+}}.2s }
; CHECK: st1 { v{{[0-9]+}}.2s }
%1 = load <2 x float>, ptr %p
%1 = load volatile <2 x float>, ptr %p
%2 = fadd <2 x float> %1, %1
%3 = bitcast <2 x float> %2 to <2 x i32>
%4 = add <2 x i32> %3, %3
Expand Down Expand Up @@ -488,7 +488,7 @@ define void @test_v4i16_v2f32(ptr %p, ptr %q) {
; CHECK: ld1 { v{{[0-9]+}}.2s }
; CHECK: rev32 v{{[0-9]+}}.4h
; CHECK: st1 { v{{[0-9]+}}.4h }
%1 = load <2 x float>, ptr %p
%1 = load volatile <2 x float>, ptr %p
%2 = fadd <2 x float> %1, %1
%3 = bitcast <2 x float> %2 to <4 x i16>
%4 = add <4 x i16> %3, %3
Expand All @@ -501,7 +501,7 @@ define void @test_v4i16_v2i32(ptr %p, ptr %q) {
; CHECK: ld1 { v{{[0-9]+}}.2s }
; CHECK: rev32 v{{[0-9]+}}.4h
; CHECK: st1 { v{{[0-9]+}}.4h }
%1 = load <2 x i32>, ptr %p
%1 = load volatile <2 x i32>, ptr %p
%2 = add <2 x i32> %1, %1
%3 = bitcast <2 x i32> %2 to <4 x i16>
%4 = add <4 x i16> %3, %3
Expand Down Expand Up @@ -587,7 +587,7 @@ define void @test_v4f16_v2f32(ptr %p, ptr %q) {
; CHECK: fadd
; CHECK-NOT: rev
; CHECK: st1 { v{{[0-9]+}}.4h }
%1 = load <2 x float>, ptr %p
%1 = load volatile <2 x float>, ptr %p
%2 = fadd <2 x float> %1, %1
%3 = bitcast <2 x float> %2 to <4 x half>
%4 = fadd <4 x half> %3, %3
Expand All @@ -602,7 +602,7 @@ define void @test_v4f16_v2i32(ptr %p, ptr %q) {
; CHECK: fadd
; CHECK-NOT: rev
; CHECK: st1 { v{{[0-9]+}}.4h }
%1 = load <2 x i32>, ptr %p
%1 = load volatile <2 x i32>, ptr %p
%2 = add <2 x i32> %1, %1
%3 = bitcast <2 x i32> %2 to <4 x half>
%4 = fadd <4 x half> %3, %3
Expand Down Expand Up @@ -682,7 +682,7 @@ define void @test_v8i8_v2f32(ptr %p, ptr %q) {
; CHECK: ld1 { v{{[0-9]+}}.2s }
; CHECK: rev32 v{{[0-9]+}}.8b
; CHECK: st1 { v{{[0-9]+}}.8b }
%1 = load <2 x float>, ptr %p
%1 = load volatile <2 x float>, ptr %p
%2 = fadd <2 x float> %1, %1
%3 = bitcast <2 x float> %2 to <8 x i8>
%4 = add <8 x i8> %3, %3
Expand All @@ -695,7 +695,7 @@ define void @test_v8i8_v2i32(ptr %p, ptr %q) {
; CHECK: ld1 { v{{[0-9]+}}.2s }
; CHECK: rev32 v{{[0-9]+}}.8b
; CHECK: st1 { v{{[0-9]+}}.8b }
%1 = load <2 x i32>, ptr %p
%1 = load volatile <2 x i32>, ptr %p
%2 = add <2 x i32> %1, %1
%3 = bitcast <2 x i32> %2 to <8 x i8>
%4 = add <8 x i8> %3, %3
Expand All @@ -721,7 +721,7 @@ define void @test_f128_v2f64(ptr %p, ptr %q) {
; CHECK: ld1 { v{{[0-9]+}}.2d }
; CHECK: ext
; CHECK: str
%1 = load <2 x double>, ptr %p
%1 = load volatile <2 x double>, ptr %p
%2 = fadd <2 x double> %1, %1
%3 = bitcast <2 x double> %2 to fp128
%4 = fadd fp128 %3, %3
Expand All @@ -734,7 +734,7 @@ define void @test_f128_v2i64(ptr %p, ptr %q) {
; CHECK: ld1 { v{{[0-9]+}}.2d }
; CHECK: ext
; CHECK: str
%1 = load <2 x i64>, ptr %p
%1 = load volatile <2 x i64>, ptr %p
%2 = add <2 x i64> %1, %1
%3 = bitcast <2 x i64> %2 to fp128
%4 = fadd fp128 %3, %3
Expand Down Expand Up @@ -816,7 +816,7 @@ define void @test_v2f64_f128(ptr %p, ptr %q) {
define void @test_v2f64_v2i64(ptr %p, ptr %q) {
; CHECK: ld1 { v{{[0-9]+}}.2d }
; CHECK: st1 { v{{[0-9]+}}.2d }
%1 = load <2 x i64>, ptr %p
%1 = load volatile <2 x i64>, ptr %p
%2 = add <2 x i64> %1, %1
%3 = bitcast <2 x i64> %2 to <2 x double>
%4 = fadd <2 x double> %3, %3
Expand Down Expand Up @@ -895,7 +895,7 @@ define void @test_v2i64_f128(ptr %p, ptr %q) {
define void @test_v2i64_v2f64(ptr %p, ptr %q) {
; CHECK: ld1 { v{{[0-9]+}}.2d }
; CHECK: st1 { v{{[0-9]+}}.2d }
%1 = load <2 x double>, ptr %p
%1 = load volatile <2 x double>, ptr %p
%2 = fadd <2 x double> %1, %1
%3 = bitcast <2 x double> %2 to <2 x i64>
%4 = add <2 x i64> %3, %3
Expand Down Expand Up @@ -979,7 +979,7 @@ define void @test_v4f32_v2f64(ptr %p, ptr %q) {
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK-NOT: rev
; CHECK: st1 { v{{[0-9]+}}.4s }
%1 = load <2 x double>, ptr %p
%1 = load volatile <2 x double>, ptr %p
%2 = fadd <2 x double> %1, %1
%3 = bitcast <2 x double> %2 to <4 x float>
%4 = fadd <4 x float> %3, %3
Expand All @@ -994,7 +994,7 @@ define void @test_v4f32_v2i64(ptr %p, ptr %q) {
; CHECK: fadd
; CHECK-NOT: rev
; CHECK: st1 { v{{[0-9]+}}.4s }
%1 = load <2 x i64>, ptr %p
%1 = load volatile <2 x i64>, ptr %p
%2 = add <2 x i64> %1, %1
%3 = bitcast <2 x i64> %2 to <4 x float>
%4 = fadd <4 x float> %3, %3
Expand Down Expand Up @@ -1062,7 +1062,7 @@ define void @test_v4i32_v2f64(ptr %p, ptr %q) {
; CHECK: ld1 { v{{[0-9]+}}.2d }
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: st1 { v{{[0-9]+}}.4s }
%1 = load <2 x double>, ptr %p
%1 = load volatile <2 x double>, ptr %p
%2 = fadd <2 x double> %1, %1
%3 = bitcast <2 x double> %2 to <4 x i32>
%4 = add <4 x i32> %3, %3
Expand All @@ -1075,7 +1075,7 @@ define void @test_v4i32_v2i64(ptr %p, ptr %q) {
; CHECK: ld1 { v{{[0-9]+}}.2d }
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: st1 { v{{[0-9]+}}.4s }
%1 = load <2 x i64>, ptr %p
%1 = load volatile <2 x i64>, ptr %p
%2 = add <2 x i64> %1, %1
%3 = bitcast <2 x i64> %2 to <4 x i32>
%4 = add <4 x i32> %3, %3
Expand Down Expand Up @@ -1141,7 +1141,7 @@ define void @test_v8i16_v2f64(ptr %p, ptr %q) {
; CHECK: ld1 { v{{[0-9]+}}.2d }
; CHECK: rev64 v{{[0-9]+}}.8h
; CHECK: st1 { v{{[0-9]+}}.8h }
%1 = load <2 x double>, ptr %p
%1 = load volatile <2 x double>, ptr %p
%2 = fadd <2 x double> %1, %1
%3 = bitcast <2 x double> %2 to <8 x i16>
%4 = add <8 x i16> %3, %3
Expand All @@ -1154,7 +1154,7 @@ define void @test_v8i16_v2i64(ptr %p, ptr %q) {
; CHECK: ld1 { v{{[0-9]+}}.2d }
; CHECK: rev64 v{{[0-9]+}}.8h
; CHECK: st1 { v{{[0-9]+}}.8h }
%1 = load <2 x i64>, ptr %p
%1 = load volatile <2 x i64>, ptr %p
%2 = add <2 x i64> %1, %1
%3 = bitcast <2 x i64> %2 to <8 x i16>
%4 = add <8 x i16> %3, %3
Expand Down Expand Up @@ -1234,7 +1234,7 @@ define void @test_v16i8_v2f64(ptr %p, ptr %q) {
; CHECK: ld1 { v{{[0-9]+}}.2d }
; CHECK: rev64 v{{[0-9]+}}.16b
; CHECK: st1 { v{{[0-9]+}}.16b }
%1 = load <2 x double>, ptr %p
%1 = load volatile <2 x double>, ptr %p
%2 = fadd <2 x double> %1, %1
%3 = bitcast <2 x double> %2 to <16 x i8>
%4 = add <16 x i8> %3, %3
Expand All @@ -1247,7 +1247,7 @@ define void @test_v16i8_v2i64(ptr %p, ptr %q) {
; CHECK: ld1 { v{{[0-9]+}}.2d }
; CHECK: rev64 v{{[0-9]+}}.16b
; CHECK: st1 { v{{[0-9]+}}.16b }
%1 = load <2 x i64>, ptr %p
%1 = load volatile <2 x i64>, ptr %p
%2 = add <2 x i64> %1, %1
%3 = bitcast <2 x i64> %2 to <16 x i8>
%4 = add <16 x i8> %3, %3
Expand Down Expand Up @@ -1315,7 +1315,7 @@ define %struct.struct1 @test_v4f16_struct(ptr %ret) {
entry:
; CHECK: ld1 { {{v[0-9]+}}.4h }
; CHECK-NOT: rev
%0 = load <4 x half>, ptr %ret, align 2
%0 = load volatile <4 x half>, ptr %ret, align 2
%1 = extractelement <4 x half> %0, i32 0
%.fca.0.insert = insertvalue %struct.struct1 undef, half %1, 0
ret %struct.struct1 %.fca.0.insert
Expand Down
5 changes: 1 addition & 4 deletions llvm/test/CodeGen/AArch64/dag-ReplaceAllUsesOfValuesWith.ll
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,7 @@
define i64 @g(ptr %p) {
; CHECK-LABEL: g:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr x8, [x0, #8]
; CHECK-NEXT: add x9, x8, x8
; CHECK-NEXT: add x8, x9, x8
; CHECK-NEXT: sub x0, x8, x8
; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: ret
%vec = load <2 x i64>, ptr %p, align 1
%elt = extractelement <2 x i64> %vec, i32 1
Expand Down
43 changes: 21 additions & 22 deletions llvm/test/CodeGen/AArch64/fcmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -679,48 +679,47 @@ define <3 x double> @v3f128_double(<3 x fp128> %a, <3 x fp128> %b, <3 x double>
; CHECK-SD-NEXT: .cfi_def_cfa_offset 160
; CHECK-SD-NEXT: .cfi_offset w30, -16
; CHECK-SD-NEXT: stp q2, q5, [sp, #112] // 32-byte Folded Spill
; CHECK-SD-NEXT: add x8, sp, #176
; CHECK-SD-NEXT: // kill: def $d6 killed $d6 def $q6
; CHECK-SD-NEXT: // kill: def $d7 killed $d7 def $q7
; CHECK-SD-NEXT: ldr d5, [sp, #184]
; CHECK-SD-NEXT: str q3, [sp, #64] // 16-byte Folded Spill
; CHECK-SD-NEXT: ldp d3, d2, [sp, #168]
; CHECK-SD-NEXT: str q3, [sp, #32] // 16-byte Folded Spill
; CHECK-SD-NEXT: ldp d3, d2, [sp, #160]
; CHECK-SD-NEXT: mov v6.d[1], v7.d[0]
; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-SD-NEXT: mov v0.16b, v1.16b
; CHECK-SD-NEXT: mov v1.16b, v4.16b
; CHECK-SD-NEXT: str q5, [sp, #96] // 16-byte Folded Spill
; CHECK-SD-NEXT: ldr d5, [sp, #160]
; CHECK-SD-NEXT: mov v3.d[1], v2.d[0]
; CHECK-SD-NEXT: str q5, [sp, #80] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp q6, q3, [sp, #32] // 32-byte Folded Spill
; CHECK-SD-NEXT: ld1 { v2.d }[1], [x8]
; CHECK-SD-NEXT: stp q6, q3, [sp, #80] // 32-byte Folded Spill
; CHECK-SD-NEXT: str q2, [sp, #48] // 16-byte Folded Spill
; CHECK-SD-NEXT: ldr d2, [sp, #184]
; CHECK-SD-NEXT: str q2, [sp, #64] // 16-byte Folded Spill
; CHECK-SD-NEXT: bl __lttf2
; CHECK-SD-NEXT: cmp w0, #0
; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
; CHECK-SD-NEXT: cset w8, lt
; CHECK-SD-NEXT: sbfx x8, x8, #0, #1
; CHECK-SD-NEXT: fmov d0, x8
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
; CHECK-SD-NEXT: bl __lttf2
; CHECK-SD-NEXT: cmp w0, #0
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: cset w8, lt
; CHECK-SD-NEXT: sbfx x8, x8, #0, #1
; CHECK-SD-NEXT: fmov d1, x8
; CHECK-SD-NEXT: mov v1.d[1], v0.d[0]
; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill
; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
; CHECK-SD-NEXT: ldp q0, q1, [sp, #112] // 32-byte Folded Reload
; CHECK-SD-NEXT: bl __lttf2
; CHECK-SD-NEXT: ldp q1, q0, [sp, #32] // 32-byte Folded Reload
; CHECK-SD-NEXT: ldp q0, q3, [sp, #80] // 32-byte Folded Reload
; CHECK-SD-NEXT: cmp w0, #0
; CHECK-SD-NEXT: ldp q2, q4, [sp, #64] // 32-byte Folded Reload
; CHECK-SD-NEXT: ldp q2, q1, [sp, #32] // 32-byte Folded Reload
; CHECK-SD-NEXT: cset w8, lt
; CHECK-SD-NEXT: sbfx x8, x8, #0, #1
; CHECK-SD-NEXT: ldr q3, [sp, #96] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldr q4, [sp, #64] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldr x30, [sp, #144] // 8-byte Folded Reload
; CHECK-SD-NEXT: bit v0.16b, v1.16b, v2.16b
; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-SD-NEXT: fmov d2, x8
; CHECK-SD-NEXT: bsl v2.16b, v4.16b, v3.16b
; CHECK-SD-NEXT: bsl v2.16b, v3.16b, v4.16b
; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2
Expand Down Expand Up @@ -815,20 +814,20 @@ define <3 x double> @v3f64_double(<3 x double> %a, <3 x double> %b, <3 x double>
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT: // kill: def $d6 killed $d6 def $q6
; CHECK-SD-NEXT: // kill: def $d7 killed $d7 def $q7
; CHECK-SD-NEXT: add x8, sp, #16
; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-SD-NEXT: // kill: def $d5 killed $d5 def $q5
; CHECK-SD-NEXT: ldr d16, [sp, #24]
; CHECK-SD-NEXT: ldr d17, [sp]
; CHECK-SD-NEXT: mov v3.d[1], v4.d[0]
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
; CHECK-SD-NEXT: mov v6.d[1], v7.d[0]
; CHECK-SD-NEXT: ldp d1, d4, [sp, #8]
; CHECK-SD-NEXT: fcmgt v2.2d, v5.2d, v2.2d
; CHECK-SD-NEXT: mov v1.d[1], v4.d[0]
; CHECK-SD-NEXT: fcmgt v0.2d, v3.2d, v0.2d
; CHECK-SD-NEXT: bsl v2.16b, v17.16b, v16.16b
; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2
; CHECK-SD-NEXT: ldp d3, d1, [sp]
; CHECK-SD-NEXT: ld1 { v1.d }[1], [x8]
; CHECK-SD-NEXT: bsl v0.16b, v6.16b, v1.16b
; CHECK-SD-NEXT: ldr d1, [sp, #24]
; CHECK-SD-NEXT: bsl v2.16b, v3.16b, v1.16b
; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2
; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AArch64/fmlal-loreg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,11 @@ define void @loop(ptr %out_tile, ptr %lhs_panel, ptr %rhs_panel, i32 noundef %K,
; CHECK-NEXT: mov w8, w3
; CHECK-NEXT: .LBB1_1: // %for.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr q2, [x1], #2
; CHECK-NEXT: ldr q2, [x2], #2
; CHECK-NEXT: subs x8, x8, #1
; CHECK-NEXT: ldr q3, [x2], #2
; CHECK-NEXT: fmlal v0.4s, v3.4h, v2.h[0]
; CHECK-NEXT: fmlal2 v1.4s, v3.4h, v2.h[0]
; CHECK-NEXT: ld1r { v3.8h }, [x1], #2
; CHECK-NEXT: fmlal v0.4s, v2.4h, v3.4h
; CHECK-NEXT: fmlal2 v1.4s, v2.4h, v3.4h
; CHECK-NEXT: b.ne .LBB1_1
; CHECK-NEXT: // %bb.2: // %for.cond.cleanup
; CHECK-NEXT: stp q0, q1, [x0]
Expand Down
Loading