Skip to content

[X86] Add missing scalar-load pattern for (V)CVTSS2SD blending with v2f64 pass through value #126955

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from

Conversation

RKSimon
Copy link
Collaborator

@RKSimon RKSimon commented Feb 12, 2025

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Feb 12, 2025

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/126955.diff

3 Files Affected:

  • (modified) llvm/lib/Target/X86/X86InstrAVX512.td (+6)
  • (modified) llvm/lib/Target/X86/X86InstrSSE.td (+10)
  • (modified) llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll (+12-48)
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 9d8c123185a7c..f8e1553d07039 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -7777,6 +7777,12 @@ def : Pat<(v2f64 (X86Movsd
           (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
           Requires<[HasAVX512]>;
 
+def : Pat<(v2f64 (X86Movsd
+                   (v2f64 VR128X:$dst),
+                   (v2f64 (scalar_to_vector (f64 (any_fpextend (loadf32 addr:$src))))))),
+          (VCVTSS2SDZrm_Int VR128:$dst, ssmem:$src)>,
+          Requires<[HasAVX512]>;
+
 //===----------------------------------------------------------------------===//
 // AVX-512  Vector convert from signed/unsigned integer to float/double
 //          and from float/double to signed/unsigned integer
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 6aadb788c851e..71ca383d59767 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -1425,6 +1425,11 @@ def : Pat<(v2f64 (X86Movsd
                      (f64 (any_fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))),
           (VCVTSS2SDrr_Int VR128:$dst, VR128:$src)>;
 
+def : Pat<(v2f64 (X86Movsd
+                   (v2f64 VR128:$dst),
+                   (v2f64 (scalar_to_vector (f64 (any_fpextend (loadf32 addr:$src))))))),
+          (VCVTSS2SDrm_Int VR128:$dst, ssmem:$src)>;
+
 def : Pat<(v4f32 (X86Movss
                    (v4f32 VR128:$dst),
                    (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
@@ -1479,6 +1484,11 @@ def : Pat<(v2f64 (X86Movsd
                      (f64 (any_fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))),
           (CVTSS2SDrr_Int VR128:$dst, VR128:$src)>;
 
+def : Pat<(v2f64 (X86Movsd
+                   (v2f64 VR128:$dst),
+                   (v2f64 (scalar_to_vector (f64 (any_fpextend (loadf32 addr:$src))))))),
+          (CVTSS2SDrm_Int VR128:$dst, ssmem:$src)>;
+
 def : Pat<(v2f64 (X86Movsd
                    (v2f64 VR128:$dst),
                    (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
index f6b0df153c260..0f4b1d915e78f 100644
--- a/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
+++ b/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
@@ -711,58 +711,34 @@ define <2 x double> @test_x86_sse2_cvtss2sd_load(<2 x double> %a0, ptr %p1) {
 ; X86-SSE-LABEL: test_x86_sse2_cvtss2sd_load:
 ; X86-SSE:       ## %bb.0:
 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X86-SSE-NEXT:    ## encoding: [0xf3,0x0f,0x10,0x08]
-; X86-SSE-NEXT:    cvtss2sd %xmm1, %xmm1 ## encoding: [0xf3,0x0f,0x5a,0xc9]
-; X86-SSE-NEXT:    movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1]
-; X86-SSE-NEXT:    ## xmm0 = xmm1[0],xmm0[1]
+; X86-SSE-NEXT:    cvtss2sd (%eax), %xmm0 ## encoding: [0xf3,0x0f,0x5a,0x00]
 ; X86-SSE-NEXT:    retl ## encoding: [0xc3]
 ;
 ; X86-AVX1-LABEL: test_x86_sse2_cvtss2sd_load:
 ; X86-AVX1:       ## %bb.0:
 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X86-AVX1-NEXT:    ## encoding: [0xc5,0xfa,0x10,0x08]
-; X86-AVX1-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0xc9]
-; X86-AVX1-NEXT:    vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03]
-; X86-AVX1-NEXT:    ## xmm0 = xmm1[0,1],xmm0[2,3]
+; X86-AVX1-NEXT:    vcvtss2sd (%eax), %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5a,0x00]
 ; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
 ;
 ; X86-AVX512-LABEL: test_x86_sse2_cvtss2sd_load:
 ; X86-AVX512:       ## %bb.0:
 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX512-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X86-AVX512-NEXT:    ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x08]
-; X86-AVX512-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0xc9]
-; X86-AVX512-NEXT:    vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03]
-; X86-AVX512-NEXT:    ## xmm0 = xmm1[0,1],xmm0[2,3]
+; X86-AVX512-NEXT:    vcvtss2sd (%eax), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5a,0x00]
 ; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
 ;
 ; X64-SSE-LABEL: test_x86_sse2_cvtss2sd_load:
 ; X64-SSE:       ## %bb.0:
-; X64-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X64-SSE-NEXT:    ## encoding: [0xf3,0x0f,0x10,0x0f]
-; X64-SSE-NEXT:    cvtss2sd %xmm1, %xmm1 ## encoding: [0xf3,0x0f,0x5a,0xc9]
-; X64-SSE-NEXT:    movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1]
-; X64-SSE-NEXT:    ## xmm0 = xmm1[0],xmm0[1]
+; X64-SSE-NEXT:    cvtss2sd (%rdi), %xmm0 ## encoding: [0xf3,0x0f,0x5a,0x07]
 ; X64-SSE-NEXT:    retq ## encoding: [0xc3]
 ;
 ; X64-AVX1-LABEL: test_x86_sse2_cvtss2sd_load:
 ; X64-AVX1:       ## %bb.0:
-; X64-AVX1-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X64-AVX1-NEXT:    ## encoding: [0xc5,0xfa,0x10,0x0f]
-; X64-AVX1-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0xc9]
-; X64-AVX1-NEXT:    vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03]
-; X64-AVX1-NEXT:    ## xmm0 = xmm1[0,1],xmm0[2,3]
+; X64-AVX1-NEXT:    vcvtss2sd (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5a,0x07]
 ; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
 ;
 ; X64-AVX512-LABEL: test_x86_sse2_cvtss2sd_load:
 ; X64-AVX512:       ## %bb.0:
-; X64-AVX512-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X64-AVX512-NEXT:    ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x0f]
-; X64-AVX512-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0xc9]
-; X64-AVX512-NEXT:    vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03]
-; X64-AVX512-NEXT:    ## xmm0 = xmm1[0,1],xmm0[2,3]
+; X64-AVX512-NEXT:    vcvtss2sd (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5a,0x07]
 ; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
   %a1 = load <4 x float>, ptr %p1
   %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
@@ -774,46 +750,34 @@ define <2 x double> @test_x86_sse2_cvtss2sd_load_optsize(<2 x double> %a0, ptr %
 ; X86-SSE-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
 ; X86-SSE:       ## %bb.0:
 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-SSE-NEXT:    cvtss2sd (%eax), %xmm1 ## encoding: [0xf3,0x0f,0x5a,0x08]
-; X86-SSE-NEXT:    movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1]
-; X86-SSE-NEXT:    ## xmm0 = xmm1[0],xmm0[1]
+; X86-SSE-NEXT:    cvtss2sd (%eax), %xmm0 ## encoding: [0xf3,0x0f,0x5a,0x00]
 ; X86-SSE-NEXT:    retl ## encoding: [0xc3]
 ;
 ; X86-AVX1-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
 ; X86-AVX1:       ## %bb.0:
 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX1-NEXT:    vcvtss2sd (%eax), %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0x08]
-; X86-AVX1-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x10,0xc1]
-; X86-AVX1-NEXT:    ## xmm0 = xmm1[0],xmm0[1]
+; X86-AVX1-NEXT:    vcvtss2sd (%eax), %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5a,0x00]
 ; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
 ;
 ; X86-AVX512-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
 ; X86-AVX512:       ## %bb.0:
 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX512-NEXT:    vcvtss2sd (%eax), %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0x08]
-; X86-AVX512-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0xc1]
-; X86-AVX512-NEXT:    ## xmm0 = xmm1[0],xmm0[1]
+; X86-AVX512-NEXT:    vcvtss2sd (%eax), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5a,0x00]
 ; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
 ;
 ; X64-SSE-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
 ; X64-SSE:       ## %bb.0:
-; X64-SSE-NEXT:    cvtss2sd (%rdi), %xmm1 ## encoding: [0xf3,0x0f,0x5a,0x0f]
-; X64-SSE-NEXT:    movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1]
-; X64-SSE-NEXT:    ## xmm0 = xmm1[0],xmm0[1]
+; X64-SSE-NEXT:    cvtss2sd (%rdi), %xmm0 ## encoding: [0xf3,0x0f,0x5a,0x07]
 ; X64-SSE-NEXT:    retq ## encoding: [0xc3]
 ;
 ; X64-AVX1-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
 ; X64-AVX1:       ## %bb.0:
-; X64-AVX1-NEXT:    vcvtss2sd (%rdi), %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0x0f]
-; X64-AVX1-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x10,0xc1]
-; X64-AVX1-NEXT:    ## xmm0 = xmm1[0],xmm0[1]
+; X64-AVX1-NEXT:    vcvtss2sd (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5a,0x07]
 ; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
 ;
 ; X64-AVX512-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
 ; X64-AVX512:       ## %bb.0:
-; X64-AVX512-NEXT:    vcvtss2sd (%rdi), %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0x0f]
-; X64-AVX512-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0xc1]
-; X64-AVX512-NEXT:    ## xmm0 = xmm1[0],xmm0[1]
+; X64-AVX512-NEXT:    vcvtss2sd (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5a,0x07]
 ; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
   %a1 = load <4 x float>, ptr %p1
   %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]

; X86-SSE-NEXT: cvtss2sd %xmm1, %xmm1 ## encoding: [0xf3,0x0f,0x5a,0xc9]
; X86-SSE-NEXT: movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1]
; X86-SSE-NEXT: ## xmm0 = xmm1[0],xmm0[1]
; X86-SSE-NEXT: cvtss2sd (%eax), %xmm0 ## encoding: [0xf3,0x0f,0x5a,0x00]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the problem only exist in upgrade code. Considering the upgrade done since LLVM 7.0, there maybe no actually requirment for this change?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Many of the patterns seem to be failing with generic code, it looks like those are the only test coverage we have:
https://gcc.godbolt.org/z/MhaoPsWdM

I'll improve the tests first so we don't rely on the *-upgrade.ll test files

(This came up while looking at ways to cleanup x86 regressions on #122671)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, that should have been: https://gcc.godbolt.org/z/zj3naMGfj

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants