From 9712bc65ca4221d740d88e667cda88bc389da0b5 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 2 Apr 2025 17:29:02 +0100 Subject: [PATCH 1/3] [DAGCombiner] Don't fold cheap extracts of multiple use splats --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 ++++-- .../sve-fixed-length-extract-subvector.ll | 12 +++++++----- .../AArch64/sve-st1-addressing-mode-reg-imm.ll | 6 ++++-- .../CodeGen/RISCV/rvv/insertelt-int-rv64.ll | 18 ++++++------------ 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index dc5c5f38e3bd8..9f0a1ecbe27fa 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -25383,8 +25383,10 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V) if (V.getOpcode() == ISD::SPLAT_VECTOR) if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse()) - if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT)) - return DAG.getSplatVector(NVT, DL, V.getOperand(0)); + if (!TLI.isExtractSubvectorCheap(NVT, V.getValueType(), ExtIdx) || + V.hasOneUse()) + if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT)) + return DAG.getSplatVector(NVT, DL, V.getOperand(0)); // extract_subvector(insert_subvector(x,y,c1),c2) // --> extract_subvector(y,c2-c1) diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll index bda7ff9115e09..ae4482d8c5e04 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll @@ -547,13 +547,15 @@ define void @extract_subvector_legalization_v8i32() vscale_range(2,2) #0 { ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: adrp x8, .LCPI40_0 ; CHECK-NEXT: add x8, x8, :lo12:.LCPI40_0 -; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: mov z1.s, #0 // =0x0 ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8] -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 -; CHECK-NEXT: cmeq v0.4s, v0.4s, v2.4s -; CHECK-NEXT: cmeq v1.4s, v1.4s, v2.4s +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: ext z2.b, z2.b, z1.b, #16 +; CHECK-NEXT: ext z3.b, z3.b, z0.b, #16 +; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s +; CHECK-NEXT: cmeq v1.4s, v3.4s, v2.4s ; CHECK-NEXT: sunpklo z0.d, z0.s ; CHECK-NEXT: sunpklo z1.d, z1.s ; CHECK-NEXT: cmpne p0.d, p1/z, z1.d, #0 diff --git a/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-imm.ll b/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-imm.ll index 71b883f0ef7ec..f39292ecefdf3 100644 --- a/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-imm.ll @@ -121,8 +121,9 @@ define void @store_nxv6f32(ptr %out) { ; CHECK: // %bb.0: ; CHECK-NEXT: fmov z0.s, #1.00000000 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: st1w { z0.d }, p0, [x0, #2, mul vl] +; CHECK-NEXT: uunpklo z1.d, z0.s ; CHECK-NEXT: str z0, [x0] +; CHECK-NEXT: st1w { z1.d }, p0, [x0, #2, mul vl] ; CHECK-NEXT: ret store splat(float 1.0), ptr %out ret void @@ -133,8 +134,9 @@ define void @store_nxv12f16(ptr %out) { ; CHECK: // %bb.0: ; CHECK-NEXT: fmov z0.h, #1.00000000 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: st1h { z0.s }, p0, [x0, #2, mul vl] +; CHECK-NEXT: uunpklo z1.s, z0.h ; CHECK-NEXT: str z0, [x0] +; CHECK-NEXT: st1h { z1.s }, p0, [x0, #2, mul vl] ; CHECK-NEXT: ret store splat(half 1.0), ptr %out ret void diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll index 0e43cbf0f4518..2d5216c97d397 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll @@ -761,13 +761,10 @@ define @insertelt_nxv8i64_idx( %v, i64 %elt define @insertelt_nxv4i32_zeroinitializer_0(i32 %x) { ; CHECK-LABEL: insertelt_nxv4i32_zeroinitializer_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, ma +; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret %v = insertelement zeroinitializer, i32 %x, i64 0 ret %v @@ -776,14 +773,11 @@ define @insertelt_nxv4i32_zeroinitializer_0(i32 %x) { define @insertelt_imm_nxv4i32_zeroinitializer_0(i32 %x) { ; CHECK-LABEL: insertelt_imm_nxv4i32_zeroinitializer_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: li a0, 42 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma -; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: li a0, 42 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, ma +; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret %v = insertelement zeroinitializer, i32 42, i64 0 ret %v From 0a9041d982274ad8f8616fc49bc3f0f1de6a4a5e Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 2 Apr 2025 17:56:01 +0100 Subject: [PATCH 2/3] Workaround AArch64 regressions --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 9 +++++---- .../AArch64/sve-fixed-length-extract-subvector.ll | 12 +++++------- .../AArch64/sve-st1-addressing-mode-reg-imm.ll | 6 ++---- 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index e0be0d83f7513..592d5aebff97c 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -20223,17 +20223,18 @@ performExtractSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, return SDValue(); EVT VT = N->getValueType(0); - if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1) - return SDValue(); - SDValue V = N->getOperand(0); + if (VT.isScalableVector() != V->getValueType(0).isScalableVector()) + return SDValue(); + // NOTE: This combine exists in DAGCombiner, but that version's legality check // blocks this combine because the non-const case requires custom lowering. + // We also want to perform it even when the splat has multiple uses. // // ty1 extract_vector(ty2 splat(const))) -> ty1 splat(const) if (V.getOpcode() == ISD::SPLAT_VECTOR) - if (isa(V.getOperand(0))) + if (isa(V.getOperand(0))) return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V.getOperand(0)); return SDValue(); diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll index ae4482d8c5e04..bda7ff9115e09 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll @@ -547,15 +547,13 @@ define void @extract_subvector_legalization_v8i32() vscale_range(2,2) #0 { ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: adrp x8, .LCPI40_0 ; CHECK-NEXT: add x8, x8, :lo12:.LCPI40_0 -; CHECK-NEXT: mov z1.s, #0 // =0x0 +; CHECK-NEXT: movi v2.2d, #0000000000000000 ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8] -; CHECK-NEXT: mov z2.d, z1.d -; CHECK-NEXT: mov z3.d, z0.d -; CHECK-NEXT: ext z2.b, z2.b, z1.b, #16 -; CHECK-NEXT: ext z3.b, z3.b, z0.b, #16 -; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: cmeq v1.4s, v3.4s, v2.4s +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 +; CHECK-NEXT: cmeq v0.4s, v0.4s, v2.4s +; CHECK-NEXT: cmeq v1.4s, v1.4s, v2.4s ; CHECK-NEXT: sunpklo z0.d, z0.s ; CHECK-NEXT: sunpklo z1.d, z1.s ; CHECK-NEXT: cmpne p0.d, p1/z, z1.d, #0 diff --git a/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-imm.ll b/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-imm.ll index f39292ecefdf3..71b883f0ef7ec 100644 --- a/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-imm.ll @@ -121,9 +121,8 @@ define void @store_nxv6f32(ptr %out) { ; CHECK: // %bb.0: ; CHECK-NEXT: fmov z0.s, #1.00000000 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: st1w { z0.d }, p0, [x0, #2, mul vl] ; CHECK-NEXT: str z0, [x0] -; CHECK-NEXT: st1w { z1.d }, p0, [x0, #2, mul vl] ; CHECK-NEXT: ret store splat(float 1.0), ptr %out ret void @@ -134,9 +133,8 @@ define void @store_nxv12f16(ptr %out) { ; CHECK: // %bb.0: ; CHECK-NEXT: fmov z0.h, #1.00000000 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: uunpklo z1.s, z0.h +; CHECK-NEXT: st1h { z0.s }, p0, [x0, #2, mul vl] ; CHECK-NEXT: str z0, [x0] -; CHECK-NEXT: st1h { z1.s }, p0, [x0, #2, mul vl] ; CHECK-NEXT: ret store splat(half 1.0), ptr %out ret void From e0f4ea362f29dbb30d1b29fa72836fc21cee3576 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 2 Apr 2025 21:17:16 +0100 Subject: [PATCH 3/3] Demorgan --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 9f0a1ecbe27fa..bb02505740a5d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -25382,11 +25382,11 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V) if (V.getOpcode() == ISD::SPLAT_VECTOR) - if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse()) - if (!TLI.isExtractSubvectorCheap(NVT, V.getValueType(), ExtIdx) || - V.hasOneUse()) - if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT)) - return DAG.getSplatVector(NVT, DL, V.getOperand(0)); + if ((DAG.isConstantValueOfAnyType(V.getOperand(0)) && + !TLI.isExtractSubvectorCheap(NVT, V.getValueType(), ExtIdx)) || + V.hasOneUse()) + if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT)) + return DAG.getSplatVector(NVT, DL, V.getOperand(0)); // extract_subvector(insert_subvector(x,y,c1),c2) // --> extract_subvector(y,c2-c1)