-
Notifications
You must be signed in to change notification settings - Fork 13.3k
[X86] Add missing scalar-load pattern for (V)CVTSS2SD blending with v2f64 pass through value #126955
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
…2f64 pass through value
@llvm/pr-subscribers-backend-x86 Author: Simon Pilgrim (RKSimon) ChangesFull diff: https://github.com/llvm/llvm-project/pull/126955.diff 3 Files Affected:
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 9d8c123185a7c..f8e1553d07039 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -7777,6 +7777,12 @@ def : Pat<(v2f64 (X86Movsd
(VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
Requires<[HasAVX512]>;
+def : Pat<(v2f64 (X86Movsd
+ (v2f64 VR128X:$dst),
+ (v2f64 (scalar_to_vector (f64 (any_fpextend (loadf32 addr:$src))))))),
+ (VCVTSS2SDZrm_Int VR128:$dst, ssmem:$src)>,
+ Requires<[HasAVX512]>;
+
//===----------------------------------------------------------------------===//
// AVX-512 Vector convert from signed/unsigned integer to float/double
// and from float/double to signed/unsigned integer
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 6aadb788c851e..71ca383d59767 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -1425,6 +1425,11 @@ def : Pat<(v2f64 (X86Movsd
(f64 (any_fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))),
(VCVTSS2SDrr_Int VR128:$dst, VR128:$src)>;
+def : Pat<(v2f64 (X86Movsd
+ (v2f64 VR128:$dst),
+ (v2f64 (scalar_to_vector (f64 (any_fpextend (loadf32 addr:$src))))))),
+ (VCVTSS2SDrm_Int VR128:$dst, ssmem:$src)>;
+
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
(v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
@@ -1479,6 +1484,11 @@ def : Pat<(v2f64 (X86Movsd
(f64 (any_fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))),
(CVTSS2SDrr_Int VR128:$dst, VR128:$src)>;
+def : Pat<(v2f64 (X86Movsd
+ (v2f64 VR128:$dst),
+ (v2f64 (scalar_to_vector (f64 (any_fpextend (loadf32 addr:$src))))))),
+ (CVTSS2SDrm_Int VR128:$dst, ssmem:$src)>;
+
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
(v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
index f6b0df153c260..0f4b1d915e78f 100644
--- a/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
+++ b/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
@@ -711,58 +711,34 @@ define <2 x double> @test_x86_sse2_cvtss2sd_load(<2 x double> %a0, ptr %p1) {
; X86-SSE-LABEL: test_x86_sse2_cvtss2sd_load:
; X86-SSE: ## %bb.0:
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X86-SSE-NEXT: ## encoding: [0xf3,0x0f,0x10,0x08]
-; X86-SSE-NEXT: cvtss2sd %xmm1, %xmm1 ## encoding: [0xf3,0x0f,0x5a,0xc9]
-; X86-SSE-NEXT: movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1]
-; X86-SSE-NEXT: ## xmm0 = xmm1[0],xmm0[1]
+; X86-SSE-NEXT: cvtss2sd (%eax), %xmm0 ## encoding: [0xf3,0x0f,0x5a,0x00]
; X86-SSE-NEXT: retl ## encoding: [0xc3]
;
; X86-AVX1-LABEL: test_x86_sse2_cvtss2sd_load:
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X86-AVX1-NEXT: ## encoding: [0xc5,0xfa,0x10,0x08]
-; X86-AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0xc9]
-; X86-AVX1-NEXT: vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03]
-; X86-AVX1-NEXT: ## xmm0 = xmm1[0,1],xmm0[2,3]
+; X86-AVX1-NEXT: vcvtss2sd (%eax), %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5a,0x00]
; X86-AVX1-NEXT: retl ## encoding: [0xc3]
;
; X86-AVX512-LABEL: test_x86_sse2_cvtss2sd_load:
; X86-AVX512: ## %bb.0:
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX512-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X86-AVX512-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x08]
-; X86-AVX512-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0xc9]
-; X86-AVX512-NEXT: vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03]
-; X86-AVX512-NEXT: ## xmm0 = xmm1[0,1],xmm0[2,3]
+; X86-AVX512-NEXT: vcvtss2sd (%eax), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5a,0x00]
; X86-AVX512-NEXT: retl ## encoding: [0xc3]
;
; X64-SSE-LABEL: test_x86_sse2_cvtss2sd_load:
; X64-SSE: ## %bb.0:
-; X64-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X64-SSE-NEXT: ## encoding: [0xf3,0x0f,0x10,0x0f]
-; X64-SSE-NEXT: cvtss2sd %xmm1, %xmm1 ## encoding: [0xf3,0x0f,0x5a,0xc9]
-; X64-SSE-NEXT: movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1]
-; X64-SSE-NEXT: ## xmm0 = xmm1[0],xmm0[1]
+; X64-SSE-NEXT: cvtss2sd (%rdi), %xmm0 ## encoding: [0xf3,0x0f,0x5a,0x07]
; X64-SSE-NEXT: retq ## encoding: [0xc3]
;
; X64-AVX1-LABEL: test_x86_sse2_cvtss2sd_load:
; X64-AVX1: ## %bb.0:
-; X64-AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X64-AVX1-NEXT: ## encoding: [0xc5,0xfa,0x10,0x0f]
-; X64-AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0xc9]
-; X64-AVX1-NEXT: vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03]
-; X64-AVX1-NEXT: ## xmm0 = xmm1[0,1],xmm0[2,3]
+; X64-AVX1-NEXT: vcvtss2sd (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5a,0x07]
; X64-AVX1-NEXT: retq ## encoding: [0xc3]
;
; X64-AVX512-LABEL: test_x86_sse2_cvtss2sd_load:
; X64-AVX512: ## %bb.0:
-; X64-AVX512-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X64-AVX512-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x0f]
-; X64-AVX512-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0xc9]
-; X64-AVX512-NEXT: vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03]
-; X64-AVX512-NEXT: ## xmm0 = xmm1[0,1],xmm0[2,3]
+; X64-AVX512-NEXT: vcvtss2sd (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5a,0x07]
; X64-AVX512-NEXT: retq ## encoding: [0xc3]
%a1 = load <4 x float>, ptr %p1
%res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
@@ -774,46 +750,34 @@ define <2 x double> @test_x86_sse2_cvtss2sd_load_optsize(<2 x double> %a0, ptr %
; X86-SSE-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
; X86-SSE: ## %bb.0:
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-SSE-NEXT: cvtss2sd (%eax), %xmm1 ## encoding: [0xf3,0x0f,0x5a,0x08]
-; X86-SSE-NEXT: movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1]
-; X86-SSE-NEXT: ## xmm0 = xmm1[0],xmm0[1]
+; X86-SSE-NEXT: cvtss2sd (%eax), %xmm0 ## encoding: [0xf3,0x0f,0x5a,0x00]
; X86-SSE-NEXT: retl ## encoding: [0xc3]
;
; X86-AVX1-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX1-NEXT: vcvtss2sd (%eax), %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0x08]
-; X86-AVX1-NEXT: vmovsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x10,0xc1]
-; X86-AVX1-NEXT: ## xmm0 = xmm1[0],xmm0[1]
+; X86-AVX1-NEXT: vcvtss2sd (%eax), %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5a,0x00]
; X86-AVX1-NEXT: retl ## encoding: [0xc3]
;
; X86-AVX512-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
; X86-AVX512: ## %bb.0:
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX512-NEXT: vcvtss2sd (%eax), %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0x08]
-; X86-AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0xc1]
-; X86-AVX512-NEXT: ## xmm0 = xmm1[0],xmm0[1]
+; X86-AVX512-NEXT: vcvtss2sd (%eax), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5a,0x00]
; X86-AVX512-NEXT: retl ## encoding: [0xc3]
;
; X64-SSE-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
; X64-SSE: ## %bb.0:
-; X64-SSE-NEXT: cvtss2sd (%rdi), %xmm1 ## encoding: [0xf3,0x0f,0x5a,0x0f]
-; X64-SSE-NEXT: movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1]
-; X64-SSE-NEXT: ## xmm0 = xmm1[0],xmm0[1]
+; X64-SSE-NEXT: cvtss2sd (%rdi), %xmm0 ## encoding: [0xf3,0x0f,0x5a,0x07]
; X64-SSE-NEXT: retq ## encoding: [0xc3]
;
; X64-AVX1-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
; X64-AVX1: ## %bb.0:
-; X64-AVX1-NEXT: vcvtss2sd (%rdi), %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0x0f]
-; X64-AVX1-NEXT: vmovsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x10,0xc1]
-; X64-AVX1-NEXT: ## xmm0 = xmm1[0],xmm0[1]
+; X64-AVX1-NEXT: vcvtss2sd (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5a,0x07]
; X64-AVX1-NEXT: retq ## encoding: [0xc3]
;
; X64-AVX512-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
; X64-AVX512: ## %bb.0:
-; X64-AVX512-NEXT: vcvtss2sd (%rdi), %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0x0f]
-; X64-AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0xc1]
-; X64-AVX512-NEXT: ## xmm0 = xmm1[0],xmm0[1]
+; X64-AVX512-NEXT: vcvtss2sd (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5a,0x07]
; X64-AVX512-NEXT: retq ## encoding: [0xc3]
%a1 = load <4 x float>, ptr %p1
%res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
|
; X86-SSE-NEXT: cvtss2sd %xmm1, %xmm1 ## encoding: [0xf3,0x0f,0x5a,0xc9] | ||
; X86-SSE-NEXT: movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1] | ||
; X86-SSE-NEXT: ## xmm0 = xmm1[0],xmm0[1] | ||
; X86-SSE-NEXT: cvtss2sd (%eax), %xmm0 ## encoding: [0xf3,0x0f,0x5a,0x00] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the problem only exist in upgrade code. Considering the upgrade done since LLVM 7.0, there maybe no actually requirment for this change?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Many of the patterns seem to be failing with generic code, it looks like those are the only test coverage we have:
https://gcc.godbolt.org/z/MhaoPsWdM
I'll improve the tests first so we don't rely on the *-upgrade.ll test files
(This came up while looking at ways to cleanup x86 regressions on #122671)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry, that should have been: https://gcc.godbolt.org/z/zj3naMGfj
No description provided.