Skip to content

Commit 10b48e4

Browse files
[InstCombine] Combine extract from get_active_lane_mask where all lanes inactive (llvm#183329)
When extracting a subvector from the result of a get_active_lane_mask, return a constant zero vector if it can be proven that all lanes will be inactive. For example, the result of the extract below will be a subvector where every lane is inactive if X & Y are const, and `Y * VScale >= X`: vector.extract(get.active.lane.mask(Start, X), Y)
1 parent 7a5ba65 commit 10b48e4

2 files changed

Lines changed: 50 additions & 0 deletions

File tree

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3927,6 +3927,16 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
39273927
return replaceOperand(CI, 0, InsertTuple);
39283928
}
39293929

3930+
ConstantInt *ALMUpperBound;
3931+
if (match(Vec, m_Intrinsic<Intrinsic::get_active_lane_mask>(
3932+
m_Value(), m_ConstantInt(ALMUpperBound)))) {
3933+
const auto &Attrs = II->getFunction()->getAttributes().getFnAttrs();
3934+
unsigned VScaleMin = Attrs.getVScaleRangeMin();
3935+
if (ExtractIdx * VScaleMin >= ALMUpperBound->getZExtValue())
3936+
return replaceInstUsesWith(CI,
3937+
ConstantVector::getNullValue(ReturnType));
3938+
}
3939+
39303940
auto *DstTy = dyn_cast<VectorType>(ReturnType);
39313941
auto *VecTy = dyn_cast<VectorType>(Vec->getType());
39323942

llvm/test/Transforms/InstCombine/get_active_lane_mask.ll

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,43 @@ define <vscale x 4 x i1> @bail_lhs_is_zero() {
3636
%mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4)
3737
ret <vscale x 4 x i1> %mask
3838
}
39+
40+
define <4 x i1> @remove_all_false_subvector() {
41+
; CHECK-LABEL: define <4 x i1> @remove_all_false_subvector() {
42+
; CHECK-NEXT: ret <4 x i1> zeroinitializer
43+
;
44+
%wide.alm = tail call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i32 0, i32 7)
45+
%ext = tail call <4 x i1> @llvm.vector.extract.v4i1.nxv16i1(<vscale x 16 x i1> %wide.alm, i64 8)
46+
ret <4 x i1> %ext
47+
}
48+
49+
define <vscale x 4 x i1> @remove_all_false_subvector_vscale() vscale_range(2,16) {
50+
; CHECK-LABEL: define <vscale x 4 x i1> @remove_all_false_subvector_vscale(
51+
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
52+
; CHECK-NEXT: ret <vscale x 4 x i1> zeroinitializer
53+
;
54+
%wide.alm = tail call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 7)
55+
%ext = tail call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> %wide.alm, i64 4)
56+
ret <vscale x 4 x i1> %ext
57+
}
58+
59+
define <vscale x 2 x i1> @active_lane_mask_non_const_start(i64 %start) {
60+
; CHECK-LABEL: define <vscale x 2 x i1> @active_lane_mask_non_const_start(
61+
; CHECK-SAME: i64 [[START:%.*]]) {
62+
; CHECK-NEXT: ret <vscale x 2 x i1> zeroinitializer
63+
;
64+
%wide.alm = tail call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 %start, i64 1)
65+
%ext = tail call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv4i1(<vscale x 4 x i1> %wide.alm, i64 2)
66+
ret <vscale x 2 x i1> %ext
67+
}
68+
69+
define <4 x i1> @ext_has_active_lanes() {
70+
; CHECK-LABEL: define <4 x i1> @ext_has_active_lanes() {
71+
; CHECK-NEXT: [[WIDE_ALM:%.*]] = tail call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 7)
72+
; CHECK-NEXT: [[EXT:%.*]] = tail call <4 x i1> @llvm.vector.extract.v4i1.nxv16i1(<vscale x 16 x i1> [[WIDE_ALM]], i64 4)
73+
; CHECK-NEXT: ret <4 x i1> [[EXT]]
74+
;
75+
%wide.alm = tail call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i32 0, i32 7)
76+
%ext = tail call <4 x i1> @llvm.vector.extract.v4i1.nxv16i1(<vscale x 16 x i1> %wide.alm, i64 4)
77+
ret <4 x i1> %ext
78+
}

0 commit comments

Comments
 (0)