@@ -1071,55 +1071,51 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
1071
1071
; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v3f32_clamp_postcvt:
1072
1072
; SDAG-GFX1100-TRUE16: ; %bb.0:
1073
1073
; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1074
- ; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1075
- ; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
1074
+ ; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v6.l, v3.l
1076
1075
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
1077
- ; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v5.l, v2.l
1078
- ; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1079
- ; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v2.h, v6.l
1080
- ; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
1081
- ; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1082
- ; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v2, v3, v5, v4 op_sel_hi:[1,1,1]
1083
- ; SDAG-GFX1100-TRUE16-NEXT: v_pack_b32_f16 v1, v0.l, 0
1076
+ ; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v7.l, v2.l
1077
+ ; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v8.l, v4.l
1078
+ ; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
1079
+ ; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v1, v1, v6, v5 op_sel_hi:[1,1,1]
1080
+ ; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v3, v3, v7, v8 op_sel_hi:[1,1,1] clamp
1084
1081
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1085
- ; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v0, v2, v2 clamp
1086
1082
; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v1, v1, v1 clamp
1083
+ ; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1084
+ ; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1085
+ ; SDAG-GFX1100-TRUE16-NEXT: v_mov_b32_e32 v0, v3
1087
1086
; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
1088
1087
;
1089
1088
; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v3f32_clamp_postcvt:
1090
1089
; SDAG-GFX1100-FAKE16: ; %bb.0:
1091
1090
; SDAG-GFX1100-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1092
- ; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
1091
+ ; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1093
1092
; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
1094
1093
; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1095
- ; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1096
- ; SDAG-GFX1100-FAKE16-NEXT: v_pack_b32_f16 v1, v1, 0
1097
- ; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1098
- ; SDAG-GFX1100-FAKE16-NEXT: v_pk_max_f16 v0, v6, v6 clamp
1094
+ ; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1099
1095
; SDAG-GFX1100-FAKE16-NEXT: v_pk_max_f16 v1, v1, v1 clamp
1096
+ ; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
1097
+ ; SDAG-GFX1100-FAKE16-NEXT: v_mov_b32_e32 v0, v6
1100
1098
; SDAG-GFX1100-FAKE16-NEXT: s_setpc_b64 s[30:31]
1101
1099
;
1102
- ; SDAG-GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt:
1103
- ; SDAG-GFX900: ; %bb.0:
1104
- ; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1105
- ; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
1106
- ; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
1107
- ; SDAG-GFX900-NEXT: v_pack_b32_f16 v1, v1, 0
1108
- ; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1109
- ; SDAG-GFX900-NEXT: v_pk_max_f16 v0, v6, v6 clamp
1110
- ; SDAG-GFX900-NEXT: v_pk_max_f16 v1, v1, v1 clamp
1111
- ; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
1100
+ ; GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt:
1101
+ ; GFX900: ; %bb.0:
1102
+ ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1103
+ ; GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
1104
+ ; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1105
+ ; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1106
+ ; GFX900-NEXT: v_pk_max_f16 v1, v1, v1 clamp
1107
+ ; GFX900-NEXT: v_mov_b32_e32 v0, v3
1108
+ ; GFX900-NEXT: s_setpc_b64 s[30:31]
1112
1109
;
1113
- ; SDAG-GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt:
1114
- ; SDAG-GFX906: ; %bb.0:
1115
- ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1116
- ; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
1117
- ; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
1118
- ; SDAG-GFX906-NEXT: v_pack_b32_f16 v1, v1, 0
1119
- ; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1120
- ; SDAG-GFX906-NEXT: v_pk_max_f16 v0, v6, v6 clamp
1121
- ; SDAG-GFX906-NEXT: v_pk_max_f16 v1, v1, v1 clamp
1122
- ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
1110
+ ; GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt:
1111
+ ; GFX906: ; %bb.0:
1112
+ ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1113
+ ; GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
1114
+ ; GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1115
+ ; GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1116
+ ; GFX906-NEXT: v_pk_max_f16 v1, v1, v1 clamp
1117
+ ; GFX906-NEXT: v_mov_b32_e32 v0, v3
1118
+ ; GFX906-NEXT: s_setpc_b64 s[30:31]
1123
1119
;
1124
1120
; SDAG-VI-LABEL: v_mad_mix_v3f32_clamp_postcvt:
1125
1121
; SDAG-VI: ; %bb.0:
@@ -1193,26 +1189,6 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
1193
1189
; GISEL-GFX1100-NEXT: v_mov_b32_e32 v0, v6
1194
1190
; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
1195
1191
;
1196
- ; GISEL-GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt:
1197
- ; GISEL-GFX900: ; %bb.0:
1198
- ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1199
- ; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
1200
- ; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1201
- ; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1202
- ; GISEL-GFX900-NEXT: v_pk_max_f16 v1, v1, v1 clamp
1203
- ; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v3
1204
- ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
1205
- ;
1206
- ; GISEL-GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt:
1207
- ; GISEL-GFX906: ; %bb.0:
1208
- ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1209
- ; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
1210
- ; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1211
- ; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1212
- ; GISEL-GFX906-NEXT: v_pk_max_f16 v1, v1, v1 clamp
1213
- ; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v3
1214
- ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
1215
- ;
1216
1192
; GISEL-VI-LABEL: v_mad_mix_v3f32_clamp_postcvt:
1217
1193
; GISEL-VI: ; %bb.0:
1218
1194
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
0 commit comments