@@ -2986,14 +2986,14 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_elt(ptr addrspace(1) %out, ptr ad
2986
2986
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
2987
2987
; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
2988
2988
; GFX6-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
2989
+ ; GFX6-NEXT: v_mov_b32_e32 v4, 0x7fc00000
2989
2990
; GFX6-NEXT: s_mov_b64 s[2:3], s[6:7]
2990
2991
; GFX6-NEXT: s_waitcnt vmcnt(0)
2991
2992
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2
2992
- ; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
2993
2993
; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
2994
- ; GFX6-NEXT: v_max_f32_e32 v3, 0 , v3
2995
- ; GFX6-NEXT: v_max_f32_e32 v2, s0 , v2
2996
- ; GFX6-NEXT: v_min_f32_e32 v3, s0, v3
2994
+ ; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
2995
+ ; GFX6-NEXT: v_max_f32_e32 v2, 0x7fc00000 , v2
2996
+ ; GFX6-NEXT: v_med3_f32 v3, v3, 0, v4
2997
2997
; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3
2998
2998
; GFX6-NEXT: v_min_f32_e32 v2, 1.0, v2
2999
2999
; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2
@@ -3006,20 +3006,20 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_elt(ptr addrspace(1) %out, ptr ad
3006
3006
; GFX8: ; %bb.0:
3007
3007
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
3008
3008
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0
3009
+ ; GFX8-NEXT: v_mov_b32_e32 v4, 0x7e00
3009
3010
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
3010
3011
; GFX8-NEXT: v_mov_b32_e32 v1, s3
3011
3012
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2
3012
3013
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3013
3014
; GFX8-NEXT: flat_load_dword v3, v[0:1]
3014
3015
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2
3015
- ; GFX8-NEXT: v_mov_b32_e32 v4, s0
3016
3016
; GFX8-NEXT: v_mov_b32_e32 v1, s1
3017
3017
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3018
3018
; GFX8-NEXT: s_waitcnt vmcnt(0)
3019
3019
; GFX8-NEXT: v_max_f16_sdwa v2, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
3020
3020
; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
3021
3021
; GFX8-NEXT: v_max_f16_e32 v2, 0, v2
3022
- ; GFX8-NEXT: v_max_f16_e32 v3, s0 , v3
3022
+ ; GFX8-NEXT: v_max_f16_e32 v3, 0x7e00 , v3
3023
3023
; GFX8-NEXT: v_min_f16_e32 v3, 1.0, v3
3024
3024
; GFX8-NEXT: v_min_f16_sdwa v2, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
3025
3025
; GFX8-NEXT: v_or_b32_e32 v2, v3, v2
@@ -3747,16 +3747,16 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts0(ptr addrspace(1) %out
3747
3747
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
3748
3748
; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
3749
3749
; GFX6-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
3750
+ ; GFX6-NEXT: v_mov_b32_e32 v4, 0x7fc00000
3750
3751
; GFX6-NEXT: s_mov_b64 s[2:3], s[6:7]
3751
3752
; GFX6-NEXT: s_waitcnt vmcnt(0)
3752
3753
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2
3753
3754
; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
3754
3755
; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
3755
- ; GFX6-NEXT: v_max_f32_e32 v3, s0, v3
3756
- ; GFX6-NEXT: v_max_f32_e32 v2, 0, v2
3756
+ ; GFX6-NEXT: v_max_f32_e32 v3, 0x7fc00000, v3
3757
3757
; GFX6-NEXT: v_min_f32_e32 v3, 1.0, v3
3758
3758
; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3
3759
- ; GFX6-NEXT: v_min_f32_e32 v2, s0, v2
3759
+ ; GFX6-NEXT: v_med3_f32 v2, v2, 0, v4
3760
3760
; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2
3761
3761
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
3762
3762
; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
@@ -3779,9 +3779,9 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts0(ptr addrspace(1) %out
3779
3779
; GFX8-NEXT: s_waitcnt vmcnt(0)
3780
3780
; GFX8-NEXT: v_max_f16_sdwa v2, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
3781
3781
; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
3782
- ; GFX8-NEXT: v_max_f16_e32 v2, s0 , v2
3782
+ ; GFX8-NEXT: v_max_f16_e32 v2, 0x7e00 , v2
3783
3783
; GFX8-NEXT: v_max_f16_e32 v3, 0, v3
3784
- ; GFX8-NEXT: v_min_f16_e32 v3, s0 , v3
3784
+ ; GFX8-NEXT: v_min_f16_e32 v3, 0x7e00 , v3
3785
3785
; GFX8-NEXT: v_min_f16_sdwa v2, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
3786
3786
; GFX8-NEXT: v_or_b32_e32 v2, v3, v2
3787
3787
; GFX8-NEXT: flat_store_dword v[0:1], v2
@@ -3845,14 +3845,14 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts1(ptr addrspace(1) %out
3845
3845
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
3846
3846
; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
3847
3847
; GFX6-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
3848
+ ; GFX6-NEXT: v_mov_b32_e32 v4, 0x7fc00000
3848
3849
; GFX6-NEXT: s_mov_b64 s[2:3], s[6:7]
3849
3850
; GFX6-NEXT: s_waitcnt vmcnt(0)
3850
3851
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2
3851
- ; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
3852
3852
; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
3853
- ; GFX6-NEXT: v_max_f32_e32 v3, 0 , v3
3854
- ; GFX6-NEXT: v_max_f32_e32 v2, s0 , v2
3855
- ; GFX6-NEXT: v_min_f32_e32 v3, s0, v3
3853
+ ; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
3854
+ ; GFX6-NEXT: v_max_f32_e32 v2, 0x7fc00000 , v2
3855
+ ; GFX6-NEXT: v_med3_f32 v3, v3, 0, v4
3856
3856
; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3
3857
3857
; GFX6-NEXT: v_min_f32_e32 v2, 1.0, v2
3858
3858
; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2
@@ -3865,20 +3865,20 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts1(ptr addrspace(1) %out
3865
3865
; GFX8: ; %bb.0:
3866
3866
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
3867
3867
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0
3868
+ ; GFX8-NEXT: v_mov_b32_e32 v4, 0x7e00
3868
3869
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
3869
3870
; GFX8-NEXT: v_mov_b32_e32 v1, s3
3870
3871
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2
3871
3872
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3872
3873
; GFX8-NEXT: flat_load_dword v3, v[0:1]
3873
3874
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2
3874
- ; GFX8-NEXT: v_mov_b32_e32 v4, s0
3875
3875
; GFX8-NEXT: v_mov_b32_e32 v1, s1
3876
3876
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3877
3877
; GFX8-NEXT: s_waitcnt vmcnt(0)
3878
3878
; GFX8-NEXT: v_max_f16_sdwa v2, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
3879
3879
; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
3880
3880
; GFX8-NEXT: v_max_f16_e32 v2, 0, v2
3881
- ; GFX8-NEXT: v_max_f16_e32 v3, s0 , v3
3881
+ ; GFX8-NEXT: v_max_f16_e32 v3, 0x7e00 , v3
3882
3882
; GFX8-NEXT: v_min_f16_e32 v3, 1.0, v3
3883
3883
; GFX8-NEXT: v_min_f16_sdwa v2, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
3884
3884
; GFX8-NEXT: v_or_b32_e32 v2, v3, v2
0 commit comments