Skip to content

Commit 8760ea5

Browse files
committed
DAG: Avoid forming shufflevector from a single extract_vector_elt
This avoids regressions in a future AMDGPU commit. Previously we would have a build_vector (extract_vector_elt x), undef with free access to the elements bloated into a shuffle of one element + undef, which has much worse combine support than the extract. Alternatively could check aggressivelyPreferBuildVectorSources, but I'm not sure it's really different than isExtractVecEltCheap.
1 parent b93cb29 commit 8760ea5

File tree

3 files changed

+29
-14
lines changed

3 files changed

+29
-14
lines changed

Diff for: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

+20-5
Original file line numberDiff line numberDiff line change
@@ -23799,6 +23799,10 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
2379923799
SmallVector<SDValue, 8> VecIn;
2380023800
VecIn.push_back(SDValue());
2380123801

23802+
// If we have a single extract_element with a constant index, track the index
23803+
// value.
23804+
unsigned OneConstExtractIndex = ~0u;
23805+
2380223806
for (unsigned i = 0; i != NumElems; ++i) {
2380323807
SDValue Op = N->getOperand(i);
2380423808

@@ -23816,23 +23820,27 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
2381623820

2381723821
// Not an undef or zero. If the input is something other than an
2381823822
// EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
23819-
if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
23820-
!isa<ConstantSDNode>(Op.getOperand(1)))
23823+
if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2382123824
return SDValue();
23822-
SDValue ExtractedFromVec = Op.getOperand(0);
2382323825

23826+
SDValue ExtractedFromVec = Op.getOperand(0);
2382423827
if (ExtractedFromVec.getValueType().isScalableVector())
2382523828
return SDValue();
23829+
auto *ExtractIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
23830+
if (!ExtractIdx)
23831+
return SDValue();
2382623832

23827-
const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
23828-
if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
23833+
if (ExtractIdx->getAsAPIntVal().uge(
23834+
ExtractedFromVec.getValueType().getVectorNumElements()))
2382923835
return SDValue();
2383023836

2383123837
// All inputs must have the same element type as the output.
2383223838
if (VT.getVectorElementType() !=
2383323839
ExtractedFromVec.getValueType().getVectorElementType())
2383423840
return SDValue();
2383523841

23842+
OneConstExtractIndex = ExtractIdx->getZExtValue();
23843+
2383623844
// Have we seen this input vector before?
2383723845
// The vectors are expected to be tiny (usually 1 or 2 elements), so using
2383823846
// a map back from SDValues to numbers isn't worth it.
@@ -23855,6 +23863,13 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
2385523863
// VecIn accordingly.
2385623864
bool DidSplitVec = false;
2385723865
if (VecIn.size() == 2) {
23866+
// If we only found a single constant indexed extract_vector_elt feeding the
23867+
// build_vector, do not produce a more complicated shuffle if the extract is
23868+
// cheap.
23869+
if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, VT) &&
23870+
TLI.isExtractVecEltCheap(VT, OneConstExtractIndex))
23871+
return SDValue();
23872+
2385823873
unsigned MaxIndex = 0;
2385923874
unsigned NearestPow2 = 0;
2386023875
SDValue Vec = VecIn.back();

Diff for: llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll

+5-5
Original file line numberDiff line numberDiff line change
@@ -452,11 +452,11 @@ define amdgpu_kernel void @byte8_inselt(ptr addrspace(1) %out, <8 x i8> %vec, i3
452452
; GCN-NEXT: s_and_b32 s6, s4, 0x1010101
453453
; GCN-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5]
454454
; GCN-NEXT: s_or_b64 s[2:3], s[6:7], s[2:3]
455-
; GCN-NEXT: v_mov_b32_e32 v3, s1
456-
; GCN-NEXT: v_mov_b32_e32 v0, s2
457-
; GCN-NEXT: v_mov_b32_e32 v1, s3
458-
; GCN-NEXT: v_mov_b32_e32 v2, s0
459-
; GCN-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
455+
; GCN-NEXT: v_mov_b32_e32 v0, s0
456+
; GCN-NEXT: v_mov_b32_e32 v2, s2
457+
; GCN-NEXT: v_mov_b32_e32 v1, s1
458+
; GCN-NEXT: v_mov_b32_e32 v3, s3
459+
; GCN-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
460460
; GCN-NEXT: s_endpgm
461461
entry:
462462
%v = insertelement <8 x i8> %vec, i8 1, i32 %sel

Diff for: llvm/test/CodeGen/X86/sse41.ll

+4-4
Original file line numberDiff line numberDiff line change
@@ -2124,14 +2124,14 @@ define <4 x float> @build_vector_to_shuffle_1(<4 x float> %A) {
21242124
; AVX1-LABEL: build_vector_to_shuffle_1:
21252125
; AVX1: ## %bb.0:
21262126
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
2127-
; AVX1-NEXT: vblendps $10, %xmm0, %xmm1, %xmm0 ## encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x0a]
2127+
; AVX1-NEXT: vblendps $5, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x05]
21282128
; AVX1-NEXT: ## xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
21292129
; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
21302130
;
21312131
; AVX512-LABEL: build_vector_to_shuffle_1:
21322132
; AVX512: ## %bb.0:
21332133
; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x57,0xc9]
2134-
; AVX512-NEXT: vblendps $10, %xmm0, %xmm1, %xmm0 ## encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x0a]
2134+
; AVX512-NEXT: vblendps $5, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x05]
21352135
; AVX512-NEXT: ## xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
21362136
; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
21372137
%vecext = extractelement <4 x float> %A, i32 1
@@ -2152,14 +2152,14 @@ define <4 x float> @build_vector_to_shuffle_2(<4 x float> %A) {
21522152
; AVX1-LABEL: build_vector_to_shuffle_2:
21532153
; AVX1: ## %bb.0:
21542154
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
2155-
; AVX1-NEXT: vblendps $2, %xmm0, %xmm1, %xmm0 ## encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x02]
2155+
; AVX1-NEXT: vblendps $13, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x0d]
21562156
; AVX1-NEXT: ## xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
21572157
; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
21582158
;
21592159
; AVX512-LABEL: build_vector_to_shuffle_2:
21602160
; AVX512: ## %bb.0:
21612161
; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x57,0xc9]
2162-
; AVX512-NEXT: vblendps $2, %xmm0, %xmm1, %xmm0 ## encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x02]
2162+
; AVX512-NEXT: vblendps $13, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x0d]
21632163
; AVX512-NEXT: ## xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
21642164
; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
21652165
%vecext = extractelement <4 x float> %A, i32 1

0 commit comments

Comments
 (0)