Skip to content

Commit 2654579

Browse files
author
v01dxyz
committed
Count leading ones promoted type optimisation: Support VP_XOR
1 parent 8b95ef8 commit 2654579

File tree

2 files changed

+118
-4
lines changed

2 files changed

+118
-4
lines changed

Diff for: llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

+29-4
Original file line numberDiff line numberDiff line change
@@ -651,19 +651,43 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) {
651651
// (CTLZ (XOR Op -1)) --> (CTLZ_ZERO_UNDEF (XOR (SHIFT (ANYEXTEND Op1)
652652
// ShiftAmount)
653653
// -1))
654+
//
655+
// The following Vector Predicated patterns will also be transformed
656+
// similarly to above using VP_CTLZ_ZERO_UNDEF and VP_XOR:
657+
//
658+
// - (VP_CTLZ (XOR Op -1) Mask VecLen)
659+
// - (VP_CTLZ (VP_XOR Op -1 Mask VecLen) Mask VecLen))
654660
static SDValue ExtendCtlzNot(SDNode *Node, SDLoc &dl, EVT OVT, EVT NVT,
655661
SelectionDAG &DAG) {
656662
SDValue SrcOp;
657-
if (!sd_match(Node->getOperand(0), m_Not(m_Value(SrcOp))))
663+
if (sd_match(Node->getOperand(0), m_Not(m_Value(SrcOp)))) {
664+
} else if (Node->isVPOpcode() &&
665+
Node->getOperand(0).getOpcode() == ISD::VP_XOR) {
666+
SDValue VPXor = Node->getOperand(0);
667+
668+
SDValue Mask = Node->getOperand(1);
669+
SDValue EVL = Node->getOperand(2);
670+
671+
SDValue VPXorMask = VPXor->getOperand(2);
672+
SDValue VPXorEVL = VPXor->getOperand(3);
673+
674+
if (VPXorMask != Mask || VPXorEVL != EVL)
675+
return SDValue();
676+
677+
if (isAllOnesOrAllOnesSplat(VPXor->getOperand(1))) {
678+
SrcOp = VPXor->getOperand(0);
679+
} else if (isAllOnesOrAllOnesSplat(VPXor->getOperand(0))) {
680+
SrcOp = VPXor->getOperand(1);
681+
} else
682+
return SDValue();
683+
} else
658684
return SDValue();
659685

660686
SDValue ExtSrc = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, SrcOp);
661687
unsigned SHLAmount = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
662688
SDValue ShiftConst =
663689
DAG.getShiftAmountConstant(SHLAmount, ExtSrc.getValueType(), dl);
664690

665-
SDValue NCstOp =
666-
DAG.getConstant(APInt::getAllOnes(NVT.getScalarSizeInBits()), dl, NVT);
667691
if (!Node->isVPOpcode()) {
668692
SDValue LShift = DAG.getNode(ISD::SHL, dl, NVT, ExtSrc, ShiftConst);
669693
SDValue Not = DAG.getNOT(dl, LShift, NVT);
@@ -675,7 +699,8 @@ static SDValue ExtendCtlzNot(SDNode *Node, SDLoc &dl, EVT OVT, EVT NVT,
675699

676700
SDValue LShift =
677701
DAG.getNode(ISD::VP_SHL, dl, NVT, ExtSrc, ShiftConst, Mask, EVL);
678-
SDValue Not = DAG.getNode(ISD::VP_XOR, dl, NVT, LShift, NCstOp, Mask, EVL);
702+
SDValue Not = DAG.getNode(ISD::VP_XOR, dl, NVT, LShift,
703+
DAG.getAllOnesConstant(dl, NVT), Mask, EVL);
679704
return DAG.getNode(ISD::VP_CTLZ_ZERO_UNDEF, dl, NVT, Not, Mask, EVL);
680705
}
681706

Diff for: llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll

+89
Original file line numberDiff line numberDiff line change
@@ -2676,6 +2676,95 @@ define <vscale x 1 x i9> @vp_ctlo_zero_undef_nxv1i9(<vscale x 1 x i9> %va, <vsca
26762676
%v = call <vscale x 1 x i9> @llvm.vp.ctlz.nxv1i9(<vscale x 1 x i9> %va.not, i1 true, <vscale x 1 x i1> %m, i32 %evl)
26772677
ret <vscale x 1 x i9> %v
26782678
}
2679+
2680+
define <vscale x 1 x i9> @vp_ctlo_nxv1i9_vp_xor(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
2681+
; CHECK-LABEL: vp_ctlo_nxv1i9_vp_xor:
2682+
; CHECK: # %bb.0:
2683+
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
2684+
; CHECK-NEXT: vsll.vi v8, v8, 7, v0.t
2685+
; CHECK-NEXT: vnot.v v8, v8, v0.t
2686+
; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t
2687+
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
2688+
; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t
2689+
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
2690+
; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t
2691+
; CHECK-NEXT: li a0, 142
2692+
; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
2693+
; CHECK-NEXT: ret
2694+
;
2695+
; CHECK-ZVBB-LABEL: vp_ctlo_nxv1i9_vp_xor:
2696+
; CHECK-ZVBB: # %bb.0:
2697+
; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
2698+
; CHECK-ZVBB-NEXT: vsll.vi v8, v8, 7, v0.t
2699+
; CHECK-ZVBB-NEXT: vnot.v v8, v8, v0.t
2700+
; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t
2701+
; CHECK-ZVBB-NEXT: ret
2702+
%va.not = call <vscale x 1 x i9> @llvm.vp.xor.nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i9> splat (i9 -1), <vscale x 1 x i1> %m, i32 %evl)
2703+
%v = call <vscale x 1 x i9> @llvm.vp.ctlz.nxv1i9(<vscale x 1 x i9> %va.not, i1 false, <vscale x 1 x i1> %m, i32 %evl)
2704+
ret <vscale x 1 x i9> %v
2705+
}
2706+
2707+
define <vscale x 1 x i9> @vp_ctlo_zero_undef_nxv1i9_vp_xor(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
2708+
; CHECK-LABEL: vp_ctlo_zero_undef_nxv1i9_vp_xor:
2709+
; CHECK: # %bb.0:
2710+
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
2711+
; CHECK-NEXT: vsll.vi v8, v8, 7, v0.t
2712+
; CHECK-NEXT: vnot.v v8, v8, v0.t
2713+
; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t
2714+
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
2715+
; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t
2716+
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
2717+
; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t
2718+
; CHECK-NEXT: li a0, 142
2719+
; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
2720+
; CHECK-NEXT: ret
2721+
;
2722+
; CHECK-ZVBB-LABEL: vp_ctlo_zero_undef_nxv1i9_vp_xor:
2723+
; CHECK-ZVBB: # %bb.0:
2724+
; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
2725+
; CHECK-ZVBB-NEXT: vsll.vi v8, v8, 7, v0.t
2726+
; CHECK-ZVBB-NEXT: vnot.v v8, v8, v0.t
2727+
; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t
2728+
; CHECK-ZVBB-NEXT: ret
2729+
%va.not = call <vscale x 1 x i9> @llvm.vp.xor.nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i9> splat (i9 -1), <vscale x 1 x i1> %m, i32 %evl)
2730+
%v = call <vscale x 1 x i9> @llvm.vp.ctlz.nxv1i9(<vscale x 1 x i9> %va.not, i1 true, <vscale x 1 x i1> %m, i32 %evl)
2731+
ret <vscale x 1 x i9> %v
2732+
}
2733+
2734+
define <vscale x 1 x i9> @vp_ctlo_zero_nxv1i9_unpredicated_ctlz_with_vp_xor(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
2735+
; CHECK-LABEL: vp_ctlo_zero_nxv1i9_unpredicated_ctlz_with_vp_xor:
2736+
; CHECK: # %bb.0:
2737+
; CHECK-NEXT: li a1, 511
2738+
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
2739+
; CHECK-NEXT: vxor.vx v8, v8, a1, v0.t
2740+
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
2741+
; CHECK-NEXT: vand.vx v8, v8, a1
2742+
; CHECK-NEXT: vfwcvt.f.xu.v v9, v8
2743+
; CHECK-NEXT: vnsrl.wi v8, v9, 23
2744+
; CHECK-NEXT: li a0, 142
2745+
; CHECK-NEXT: vrsub.vx v8, v8, a0
2746+
; CHECK-NEXT: li a0, 16
2747+
; CHECK-NEXT: vminu.vx v8, v8, a0
2748+
; CHECK-NEXT: li a0, 7
2749+
; CHECK-NEXT: vsub.vx v8, v8, a0
2750+
; CHECK-NEXT: ret
2751+
;
2752+
; CHECK-ZVBB-LABEL: vp_ctlo_zero_nxv1i9_unpredicated_ctlz_with_vp_xor:
2753+
; CHECK-ZVBB: # %bb.0:
2754+
; CHECK-ZVBB-NEXT: li a1, 511
2755+
; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
2756+
; CHECK-ZVBB-NEXT: vxor.vx v8, v8, a1, v0.t
2757+
; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
2758+
; CHECK-ZVBB-NEXT: vand.vx v8, v8, a1
2759+
; CHECK-ZVBB-NEXT: vclz.v v8, v8
2760+
; CHECK-ZVBB-NEXT: li a0, 7
2761+
; CHECK-ZVBB-NEXT: vsub.vx v8, v8, a0
2762+
; CHECK-ZVBB-NEXT: ret
2763+
%va.not = call <vscale x 1 x i9> @llvm.vp.xor.nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i9> splat (i9 -1), <vscale x 1 x i1> %m, i32 %evl)
2764+
%v = call <vscale x 1 x i9> @llvm.ctlz(<vscale x 1 x i9> %va.not, i1 false)
2765+
ret <vscale x 1 x i9> %v
2766+
}
2767+
26792768
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
26802769
; RV32: {{.*}}
26812770
; RV64: {{.*}}

0 commit comments

Comments
 (0)