Skip to content

Commit 1ece27b

Browse files
committed
[SDAG] Handle insert_subvector in isKnownNeverNaN
Propagate nnan across insert_subvector.
1 parent 1b5c3fa commit 1ece27b

File tree

5 files changed

+58
-80
lines changed

5 files changed

+58
-80
lines changed

Diff for: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -5625,6 +5625,9 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
56255625
(SNaN && !C->getValueAPF().isSignaling());
56265626
}
56275627

5628+
if (Op.isUndef())
5629+
return true;
5630+
56285631
unsigned Opcode = Op.getOpcode();
56295632
switch (Opcode) {
56305633
case ISD::FADD:
@@ -5727,6 +5730,9 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
57275730
case ISD::EXTRACT_SUBVECTOR: {
57285731
return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
57295732
}
5733+
case ISD::INSERT_SUBVECTOR:
5734+
return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
5735+
isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
57305736
case ISD::BUILD_VECTOR: {
57315737
for (const SDValue &Opnd : Op->ops())
57325738
if (!isKnownNeverNaN(Opnd, SNaN, Depth + 1))

Diff for: llvm/test/CodeGen/AMDGPU/clamp.ll

+17-17
Original file line numberDiff line numberDiff line change
@@ -2986,14 +2986,14 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_elt(ptr addrspace(1) %out, ptr ad
29862986
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
29872987
; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
29882988
; GFX6-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
2989-
; GFX6-NEXT: v_mov_b32_e32 v4, 0x7fc00000
29902989
; GFX6-NEXT: s_mov_b64 s[2:3], s[6:7]
29912990
; GFX6-NEXT: s_waitcnt vmcnt(0)
29922991
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2
2993-
; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
29942992
; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
2995-
; GFX6-NEXT: v_max_f32_e32 v2, 0x7fc00000, v2
2996-
; GFX6-NEXT: v_med3_f32 v3, v3, 0, v4
2993+
; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
2994+
; GFX6-NEXT: v_max_f32_e32 v3, 0, v3
2995+
; GFX6-NEXT: v_max_f32_e32 v2, s0, v2
2996+
; GFX6-NEXT: v_min_f32_e32 v3, s0, v3
29972997
; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3
29982998
; GFX6-NEXT: v_min_f32_e32 v2, 1.0, v2
29992999
; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2
@@ -3006,20 +3006,20 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_elt(ptr addrspace(1) %out, ptr ad
30063006
; GFX8: ; %bb.0:
30073007
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
30083008
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0
3009-
; GFX8-NEXT: v_mov_b32_e32 v4, 0x7e00
30103009
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
30113010
; GFX8-NEXT: v_mov_b32_e32 v1, s3
30123011
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2
30133012
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
30143013
; GFX8-NEXT: flat_load_dword v3, v[0:1]
30153014
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2
3015+
; GFX8-NEXT: v_mov_b32_e32 v4, s0
30163016
; GFX8-NEXT: v_mov_b32_e32 v1, s1
30173017
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
30183018
; GFX8-NEXT: s_waitcnt vmcnt(0)
30193019
; GFX8-NEXT: v_max_f16_sdwa v2, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
30203020
; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
30213021
; GFX8-NEXT: v_max_f16_e32 v2, 0, v2
3022-
; GFX8-NEXT: v_max_f16_e32 v3, 0x7e00, v3
3022+
; GFX8-NEXT: v_max_f16_e32 v3, s0, v3
30233023
; GFX8-NEXT: v_min_f16_e32 v3, 1.0, v3
30243024
; GFX8-NEXT: v_min_f16_sdwa v2, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
30253025
; GFX8-NEXT: v_or_b32_e32 v2, v3, v2
@@ -3747,16 +3747,16 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts0(ptr addrspace(1) %out
37473747
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
37483748
; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
37493749
; GFX6-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
3750-
; GFX6-NEXT: v_mov_b32_e32 v4, 0x7fc00000
37513750
; GFX6-NEXT: s_mov_b64 s[2:3], s[6:7]
37523751
; GFX6-NEXT: s_waitcnt vmcnt(0)
37533752
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2
37543753
; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
37553754
; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
3756-
; GFX6-NEXT: v_max_f32_e32 v3, 0x7fc00000, v3
3755+
; GFX6-NEXT: v_max_f32_e32 v3, s0, v3
3756+
; GFX6-NEXT: v_max_f32_e32 v2, 0, v2
37573757
; GFX6-NEXT: v_min_f32_e32 v3, 1.0, v3
37583758
; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3
3759-
; GFX6-NEXT: v_med3_f32 v2, v2, 0, v4
3759+
; GFX6-NEXT: v_min_f32_e32 v2, s0, v2
37603760
; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2
37613761
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
37623762
; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
@@ -3779,9 +3779,9 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts0(ptr addrspace(1) %out
37793779
; GFX8-NEXT: s_waitcnt vmcnt(0)
37803780
; GFX8-NEXT: v_max_f16_sdwa v2, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
37813781
; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
3782-
; GFX8-NEXT: v_max_f16_e32 v2, 0x7e00, v2
3782+
; GFX8-NEXT: v_max_f16_e32 v2, s0, v2
37833783
; GFX8-NEXT: v_max_f16_e32 v3, 0, v3
3784-
; GFX8-NEXT: v_min_f16_e32 v3, 0x7e00, v3
3784+
; GFX8-NEXT: v_min_f16_e32 v3, s0, v3
37853785
; GFX8-NEXT: v_min_f16_sdwa v2, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
37863786
; GFX8-NEXT: v_or_b32_e32 v2, v3, v2
37873787
; GFX8-NEXT: flat_store_dword v[0:1], v2
@@ -3845,14 +3845,14 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts1(ptr addrspace(1) %out
38453845
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
38463846
; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
38473847
; GFX6-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
3848-
; GFX6-NEXT: v_mov_b32_e32 v4, 0x7fc00000
38493848
; GFX6-NEXT: s_mov_b64 s[2:3], s[6:7]
38503849
; GFX6-NEXT: s_waitcnt vmcnt(0)
38513850
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2
3852-
; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
38533851
; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
3854-
; GFX6-NEXT: v_max_f32_e32 v2, 0x7fc00000, v2
3855-
; GFX6-NEXT: v_med3_f32 v3, v3, 0, v4
3852+
; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
3853+
; GFX6-NEXT: v_max_f32_e32 v3, 0, v3
3854+
; GFX6-NEXT: v_max_f32_e32 v2, s0, v2
3855+
; GFX6-NEXT: v_min_f32_e32 v3, s0, v3
38563856
; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3
38573857
; GFX6-NEXT: v_min_f32_e32 v2, 1.0, v2
38583858
; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2
@@ -3865,20 +3865,20 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts1(ptr addrspace(1) %out
38653865
; GFX8: ; %bb.0:
38663866
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
38673867
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0
3868-
; GFX8-NEXT: v_mov_b32_e32 v4, 0x7e00
38693868
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
38703869
; GFX8-NEXT: v_mov_b32_e32 v1, s3
38713870
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2
38723871
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
38733872
; GFX8-NEXT: flat_load_dword v3, v[0:1]
38743873
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2
3874+
; GFX8-NEXT: v_mov_b32_e32 v4, s0
38753875
; GFX8-NEXT: v_mov_b32_e32 v1, s1
38763876
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
38773877
; GFX8-NEXT: s_waitcnt vmcnt(0)
38783878
; GFX8-NEXT: v_max_f16_sdwa v2, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
38793879
; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
38803880
; GFX8-NEXT: v_max_f16_e32 v2, 0, v2
3881-
; GFX8-NEXT: v_max_f16_e32 v3, 0x7e00, v3
3881+
; GFX8-NEXT: v_max_f16_e32 v3, s0, v3
38823882
; GFX8-NEXT: v_min_f16_e32 v3, 1.0, v3
38833883
; GFX8-NEXT: v_min_f16_sdwa v2, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
38843884
; GFX8-NEXT: v_or_b32_e32 v2, v3, v2

Diff for: llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll

+31-55
Original file line numberDiff line numberDiff line change
@@ -1071,55 +1071,51 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
10711071
; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v3f32_clamp_postcvt:
10721072
; SDAG-GFX1100-TRUE16: ; %bb.0:
10731073
; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1074-
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1075-
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
1074+
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v6.l, v3.l
10761075
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
1077-
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v5.l, v2.l
1078-
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1079-
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v2.h, v6.l
1080-
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
1081-
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1082-
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v2, v3, v5, v4 op_sel_hi:[1,1,1]
1083-
; SDAG-GFX1100-TRUE16-NEXT: v_pack_b32_f16 v1, v0.l, 0
1076+
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v7.l, v2.l
1077+
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v8.l, v4.l
1078+
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
1079+
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v1, v1, v6, v5 op_sel_hi:[1,1,1]
1080+
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v3, v3, v7, v8 op_sel_hi:[1,1,1] clamp
10841081
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1085-
; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v0, v2, v2 clamp
10861082
; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v1, v1, v1 clamp
1083+
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1084+
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1085+
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b32_e32 v0, v3
10871086
; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
10881087
;
10891088
; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v3f32_clamp_postcvt:
10901089
; SDAG-GFX1100-FAKE16: ; %bb.0:
10911090
; SDAG-GFX1100-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1092-
; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
1091+
; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
10931092
; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
10941093
; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1095-
; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1096-
; SDAG-GFX1100-FAKE16-NEXT: v_pack_b32_f16 v1, v1, 0
1097-
; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1098-
; SDAG-GFX1100-FAKE16-NEXT: v_pk_max_f16 v0, v6, v6 clamp
1094+
; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
10991095
; SDAG-GFX1100-FAKE16-NEXT: v_pk_max_f16 v1, v1, v1 clamp
1096+
; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
1097+
; SDAG-GFX1100-FAKE16-NEXT: v_mov_b32_e32 v0, v6
11001098
; SDAG-GFX1100-FAKE16-NEXT: s_setpc_b64 s[30:31]
11011099
;
1102-
; SDAG-GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt:
1103-
; SDAG-GFX900: ; %bb.0:
1104-
; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1105-
; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
1106-
; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
1107-
; SDAG-GFX900-NEXT: v_pack_b32_f16 v1, v1, 0
1108-
; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1109-
; SDAG-GFX900-NEXT: v_pk_max_f16 v0, v6, v6 clamp
1110-
; SDAG-GFX900-NEXT: v_pk_max_f16 v1, v1, v1 clamp
1111-
; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
1100+
; GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt:
1101+
; GFX900: ; %bb.0:
1102+
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1103+
; GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
1104+
; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1105+
; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1106+
; GFX900-NEXT: v_pk_max_f16 v1, v1, v1 clamp
1107+
; GFX900-NEXT: v_mov_b32_e32 v0, v3
1108+
; GFX900-NEXT: s_setpc_b64 s[30:31]
11121109
;
1113-
; SDAG-GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt:
1114-
; SDAG-GFX906: ; %bb.0:
1115-
; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1116-
; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
1117-
; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
1118-
; SDAG-GFX906-NEXT: v_pack_b32_f16 v1, v1, 0
1119-
; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1120-
; SDAG-GFX906-NEXT: v_pk_max_f16 v0, v6, v6 clamp
1121-
; SDAG-GFX906-NEXT: v_pk_max_f16 v1, v1, v1 clamp
1122-
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
1110+
; GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt:
1111+
; GFX906: ; %bb.0:
1112+
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1113+
; GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
1114+
; GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1115+
; GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1116+
; GFX906-NEXT: v_pk_max_f16 v1, v1, v1 clamp
1117+
; GFX906-NEXT: v_mov_b32_e32 v0, v3
1118+
; GFX906-NEXT: s_setpc_b64 s[30:31]
11231119
;
11241120
; SDAG-VI-LABEL: v_mad_mix_v3f32_clamp_postcvt:
11251121
; SDAG-VI: ; %bb.0:
@@ -1193,26 +1189,6 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
11931189
; GISEL-GFX1100-NEXT: v_mov_b32_e32 v0, v6
11941190
; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
11951191
;
1196-
; GISEL-GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt:
1197-
; GISEL-GFX900: ; %bb.0:
1198-
; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1199-
; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
1200-
; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1201-
; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1202-
; GISEL-GFX900-NEXT: v_pk_max_f16 v1, v1, v1 clamp
1203-
; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v3
1204-
; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
1205-
;
1206-
; GISEL-GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt:
1207-
; GISEL-GFX906: ; %bb.0:
1208-
; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1209-
; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
1210-
; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1211-
; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1212-
; GISEL-GFX906-NEXT: v_pk_max_f16 v1, v1, v1 clamp
1213-
; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v3
1214-
; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
1215-
;
12161192
; GISEL-VI-LABEL: v_mad_mix_v3f32_clamp_postcvt:
12171193
; GISEL-VI: ; %bb.0:
12181194
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)

Diff for: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll

+2-4
Original file line numberDiff line numberDiff line change
@@ -367,12 +367,10 @@ define <4 x half> @vfmax_v2f16_vv_nnan_insert_subvector(<2 x half> %a, <2 x half
367367
; ZVFH-NEXT: vfadd.vv v8, v8, v8
368368
; ZVFH-NEXT: vfadd.vv v9, v9, v9
369369
; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
370-
; ZVFH-NEXT: vslideup.vi v8, v9, 2
371-
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
372-
; ZVFH-NEXT: vmerge.vvm v9, v8, v10, v0
373370
; ZVFH-NEXT: vmfeq.vv v0, v10, v10
371+
; ZVFH-NEXT: vslideup.vi v8, v9, 2
374372
; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0
375-
; ZVFH-NEXT: vfmax.vv v8, v8, v9
373+
; ZVFH-NEXT: vfmax.vv v8, v8, v10
376374
; ZVFH-NEXT: ret
377375
;
378376
; ZVFHMIN-LABEL: vfmax_v2f16_vv_nnan_insert_subvector:

Diff for: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll

+2-4
Original file line numberDiff line numberDiff line change
@@ -367,12 +367,10 @@ define <4 x half> @vfmin_v2f16_vv_nnan_insert_subvector(<2 x half> %a, <2 x half
367367
; ZVFH-NEXT: vfadd.vv v8, v8, v8
368368
; ZVFH-NEXT: vfadd.vv v9, v9, v9
369369
; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
370-
; ZVFH-NEXT: vslideup.vi v8, v9, 2
371-
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
372-
; ZVFH-NEXT: vmerge.vvm v9, v8, v10, v0
373370
; ZVFH-NEXT: vmfeq.vv v0, v10, v10
371+
; ZVFH-NEXT: vslideup.vi v8, v9, 2
374372
; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0
375-
; ZVFH-NEXT: vfmin.vv v8, v8, v9
373+
; ZVFH-NEXT: vfmin.vv v8, v8, v10
376374
; ZVFH-NEXT: ret
377375
;
378376
; ZVFHMIN-LABEL: vfmin_v2f16_vv_nnan_insert_subvector:

0 commit comments

Comments
 (0)