diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index dc5c5f38e3bd8..bb02505740a5d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -25382,7 +25382,9 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V) if (V.getOpcode() == ISD::SPLAT_VECTOR) - if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse()) + if ((DAG.isConstantValueOfAnyType(V.getOperand(0)) && + !TLI.isExtractSubvectorCheap(NVT, V.getValueType(), ExtIdx)) || + V.hasOneUse()) if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT)) return DAG.getSplatVector(NVT, DL, V.getOperand(0)); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index e0be0d83f7513..592d5aebff97c 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -20223,17 +20223,18 @@ performExtractSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, return SDValue(); EVT VT = N->getValueType(0); - if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1) - return SDValue(); - SDValue V = N->getOperand(0); + if (VT.isScalableVector() != V->getValueType(0).isScalableVector()) + return SDValue(); + // NOTE: This combine exists in DAGCombiner, but that version's legality check // blocks this combine because the non-const case requires custom lowering. + // We also want to perform it even when the splat has multiple uses. // // ty1 extract_vector(ty2 splat(const))) -> ty1 splat(const) if (V.getOpcode() == ISD::SPLAT_VECTOR) - if (isa(V.getOperand(0))) + if (isa(V.getOperand(0))) return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V.getOperand(0)); return SDValue(); diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll index 0e43cbf0f4518..2d5216c97d397 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll @@ -761,13 +761,10 @@ define @insertelt_nxv8i64_idx( %v, i64 %elt define @insertelt_nxv4i32_zeroinitializer_0(i32 %x) { ; CHECK-LABEL: insertelt_nxv4i32_zeroinitializer_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, ma +; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret %v = insertelement zeroinitializer, i32 %x, i64 0 ret %v @@ -776,14 +773,11 @@ define @insertelt_nxv4i32_zeroinitializer_0(i32 %x) { define @insertelt_imm_nxv4i32_zeroinitializer_0(i32 %x) { ; CHECK-LABEL: insertelt_imm_nxv4i32_zeroinitializer_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: li a0, 42 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma -; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: li a0, 42 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, ma +; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret %v = insertelement zeroinitializer, i32 42, i64 0 ret %v