|
19 | 19 | ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_4s_strict |
20 | 20 | ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_2d_strict |
21 | 21 | ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmulh_lane_1s |
22 | | -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqdmulls_lane_s32 |
23 | | -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_lane_1d |
24 | | -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_lane_1d |
25 | 22 | ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v4f32 |
26 | 23 | ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f32 |
27 | 24 | ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f64 |
|
30 | 27 | ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32 |
31 | 28 | ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32_1 |
32 | 29 | ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f64 |
33 | | -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_d |
34 | | -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_d |
35 | 30 |
|
36 | 31 | define <8 x i16> @smull8h(ptr %A, ptr %B) nounwind { |
37 | 32 | ; CHECK-LABEL: smull8h: |
@@ -1794,52 +1789,106 @@ define i32 @sqsub_lane1_sqdmull4s(i32 %A, <4 x i16> %B, <4 x i16> %C) nounwind { |
1794 | 1789 | } |
1795 | 1790 |
|
1796 | 1791 | define i64 @test_vqdmulls_lane_s32(i32 noundef %a, <2 x i32> noundef %b) { |
1797 | | -; CHECK-LABEL: test_vqdmulls_lane_s32: |
1798 | | -; CHECK: // %bb.0: // %entry |
1799 | | -; CHECK-NEXT: fmov s1, w0 |
1800 | | -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 |
1801 | | -; CHECK-NEXT: sqdmull d0, s1, v0.s[1] |
1802 | | -; CHECK-NEXT: fmov x0, d0 |
1803 | | -; CHECK-NEXT: ret |
| 1792 | +; CHECK-SD-LABEL: test_vqdmulls_lane_s32: |
| 1793 | +; CHECK-SD: // %bb.0: // %entry |
| 1794 | +; CHECK-SD-NEXT: fmov s1, w0 |
| 1795 | +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 |
| 1796 | +; CHECK-SD-NEXT: sqdmull d0, s1, v0.s[1] |
| 1797 | +; CHECK-SD-NEXT: fmov x0, d0 |
| 1798 | +; CHECK-SD-NEXT: ret |
| 1799 | +; |
| 1800 | +; CHECK-GI-LABEL: test_vqdmulls_lane_s32: |
| 1801 | +; CHECK-GI: // %bb.0: // %entry |
| 1802 | +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 |
| 1803 | +; CHECK-GI-NEXT: fmov s1, w0 |
| 1804 | +; CHECK-GI-NEXT: mov s0, v0.s[1] |
| 1805 | +; CHECK-GI-NEXT: sqdmull d0, s1, s0 |
| 1806 | +; CHECK-GI-NEXT: fmov x0, d0 |
| 1807 | +; CHECK-GI-NEXT: ret |
1804 | 1808 | entry: |
1805 | 1809 | %vget_lane = extractelement <2 x i32> %b, i64 1 |
1806 | 1810 | %vqdmulls_s32.i = tail call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 %vget_lane) |
1807 | 1811 | ret i64 %vqdmulls_s32.i |
1808 | 1812 | } |
1809 | 1813 |
|
1810 | | -define i64 @sqdmlal_lane_1d(i64 %A, i32 %B, <2 x i32> %C) nounwind { |
1811 | | -; CHECK-LABEL: sqdmlal_lane_1d: |
| 1814 | +define i64 @sqdmlal_lane_1d_v2i32(i64 %A, i32 %B, <2 x i32> %C) nounwind { |
| 1815 | +; CHECK-SD-LABEL: sqdmlal_lane_1d_v2i32: |
| 1816 | +; CHECK-SD: // %bb.0: |
| 1817 | +; CHECK-SD-NEXT: fmov s1, w1 |
| 1818 | +; CHECK-SD-NEXT: fmov d2, x0 |
| 1819 | +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 |
| 1820 | +; CHECK-SD-NEXT: sqdmlal d2, s1, v0.s[1] |
| 1821 | +; CHECK-SD-NEXT: fmov x0, d2 |
| 1822 | +; CHECK-SD-NEXT: ret |
| 1823 | +; |
| 1824 | +; CHECK-GI-LABEL: sqdmlal_lane_1d_v2i32: |
| 1825 | +; CHECK-GI: // %bb.0: |
| 1826 | +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 |
| 1827 | +; CHECK-GI-NEXT: fmov s1, w1 |
| 1828 | +; CHECK-GI-NEXT: fmov d2, x0 |
| 1829 | +; CHECK-GI-NEXT: mov s0, v0.s[1] |
| 1830 | +; CHECK-GI-NEXT: sqdmlal d2, s1, s0 |
| 1831 | +; CHECK-GI-NEXT: fmov x0, d2 |
| 1832 | +; CHECK-GI-NEXT: ret |
| 1833 | + %rhs = extractelement <2 x i32> %C, i32 1 |
| 1834 | + %prod = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %B, i32 %rhs) |
| 1835 | + %res = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %A, i64 %prod) |
| 1836 | + ret i64 %res |
| 1837 | +} |
| 1838 | + |
| 1839 | +define i64 @sqdmlsl_lane_1d_v2i32(i64 %A, i32 %B, <2 x i32> %C) nounwind { |
| 1840 | +; CHECK-SD-LABEL: sqdmlsl_lane_1d_v2i32: |
| 1841 | +; CHECK-SD: // %bb.0: |
| 1842 | +; CHECK-SD-NEXT: fmov s1, w1 |
| 1843 | +; CHECK-SD-NEXT: fmov d2, x0 |
| 1844 | +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 |
| 1845 | +; CHECK-SD-NEXT: sqdmlsl d2, s1, v0.s[1] |
| 1846 | +; CHECK-SD-NEXT: fmov x0, d2 |
| 1847 | +; CHECK-SD-NEXT: ret |
| 1848 | +; |
| 1849 | +; CHECK-GI-LABEL: sqdmlsl_lane_1d_v2i32: |
| 1850 | +; CHECK-GI: // %bb.0: |
| 1851 | +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 |
| 1852 | +; CHECK-GI-NEXT: fmov s1, w1 |
| 1853 | +; CHECK-GI-NEXT: fmov d2, x0 |
| 1854 | +; CHECK-GI-NEXT: mov s0, v0.s[1] |
| 1855 | +; CHECK-GI-NEXT: sqdmlsl d2, s1, s0 |
| 1856 | +; CHECK-GI-NEXT: fmov x0, d2 |
| 1857 | +; CHECK-GI-NEXT: ret |
| 1858 | + %rhs = extractelement <2 x i32> %C, i32 1 |
| 1859 | + %prod = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %B, i32 %rhs) |
| 1860 | + %res = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %A, i64 %prod) |
| 1861 | + ret i64 %res |
| 1862 | +} |
| 1863 | +declare i64 @llvm.aarch64.neon.sqsub.i64(i64, i64) |
| 1864 | + |
| 1865 | +define i64 @sqdmlal_lane_1d_v4i32(i64 %A, i32 %B, <4 x i32> %C) nounwind { |
| 1866 | +; CHECK-LABEL: sqdmlal_lane_1d_v4i32: |
1812 | 1867 | ; CHECK: // %bb.0: |
1813 | 1868 | ; CHECK-NEXT: fmov s1, w1 |
1814 | 1869 | ; CHECK-NEXT: fmov d2, x0 |
1815 | | -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 |
1816 | 1870 | ; CHECK-NEXT: sqdmlal d2, s1, v0.s[1] |
1817 | 1871 | ; CHECK-NEXT: fmov x0, d2 |
1818 | 1872 | ; CHECK-NEXT: ret |
1819 | | - %rhs = extractelement <2 x i32> %C, i32 1 |
| 1873 | + %rhs = extractelement <4 x i32> %C, i32 1 |
1820 | 1874 | %prod = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %B, i32 %rhs) |
1821 | 1875 | %res = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %A, i64 %prod) |
1822 | 1876 | ret i64 %res |
1823 | 1877 | } |
1824 | | -declare i64 @llvm.aarch64.neon.sqdmulls.scalar(i32, i32) |
1825 | | -declare i64 @llvm.aarch64.neon.sqadd.i64(i64, i64) |
1826 | 1878 |
|
1827 | | -define i64 @sqdmlsl_lane_1d(i64 %A, i32 %B, <2 x i32> %C) nounwind { |
1828 | | -; CHECK-LABEL: sqdmlsl_lane_1d: |
| 1879 | +define i64 @sqdmlsl_lane_1d_v4i32(i64 %A, i32 %B, <4 x i32> %C) nounwind { |
| 1880 | +; CHECK-LABEL: sqdmlsl_lane_1d_v4i32: |
1829 | 1881 | ; CHECK: // %bb.0: |
1830 | 1882 | ; CHECK-NEXT: fmov s1, w1 |
1831 | 1883 | ; CHECK-NEXT: fmov d2, x0 |
1832 | | -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 |
1833 | 1884 | ; CHECK-NEXT: sqdmlsl d2, s1, v0.s[1] |
1834 | 1885 | ; CHECK-NEXT: fmov x0, d2 |
1835 | 1886 | ; CHECK-NEXT: ret |
1836 | | - %rhs = extractelement <2 x i32> %C, i32 1 |
| 1887 | + %rhs = extractelement <4 x i32> %C, i32 1 |
1837 | 1888 | %prod = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %B, i32 %rhs) |
1838 | 1889 | %res = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %A, i64 %prod) |
1839 | 1890 | ret i64 %res |
1840 | 1891 | } |
1841 | | -declare i64 @llvm.aarch64.neon.sqsub.i64(i64, i64) |
1842 | | - |
1843 | 1892 |
|
1844 | 1893 | define <4 x i32> @umlal_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwind { |
1845 | 1894 | ; CHECK-LABEL: umlal_lane_4s: |
@@ -3216,14 +3265,23 @@ define i32 @sqdmlal_s(i16 %A, i16 %B, i32 %C) nounwind { |
3216 | 3265 | } |
3217 | 3266 |
|
3218 | 3267 | define i64 @sqdmlal_d(i32 %A, i32 %B, i64 %C) nounwind { |
3219 | | -; CHECK-LABEL: sqdmlal_d: |
3220 | | -; CHECK: // %bb.0: |
3221 | | -; CHECK-NEXT: fmov s0, w1 |
3222 | | -; CHECK-NEXT: fmov s1, w0 |
3223 | | -; CHECK-NEXT: fmov d2, x2 |
3224 | | -; CHECK-NEXT: sqdmlal d2, s1, s0 |
3225 | | -; CHECK-NEXT: fmov x0, d2 |
3226 | | -; CHECK-NEXT: ret |
| 3268 | +; CHECK-SD-LABEL: sqdmlal_d: |
| 3269 | +; CHECK-SD: // %bb.0: |
| 3270 | +; CHECK-SD-NEXT: fmov s0, w1 |
| 3271 | +; CHECK-SD-NEXT: fmov s1, w0 |
| 3272 | +; CHECK-SD-NEXT: fmov d2, x2 |
| 3273 | +; CHECK-SD-NEXT: sqdmlal d2, s1, s0 |
| 3274 | +; CHECK-SD-NEXT: fmov x0, d2 |
| 3275 | +; CHECK-SD-NEXT: ret |
| 3276 | +; |
| 3277 | +; CHECK-GI-LABEL: sqdmlal_d: |
| 3278 | +; CHECK-GI: // %bb.0: |
| 3279 | +; CHECK-GI-NEXT: fmov s0, w0 |
| 3280 | +; CHECK-GI-NEXT: fmov s1, w1 |
| 3281 | +; CHECK-GI-NEXT: fmov d2, x2 |
| 3282 | +; CHECK-GI-NEXT: sqdmlal d2, s0, s1 |
| 3283 | +; CHECK-GI-NEXT: fmov x0, d2 |
| 3284 | +; CHECK-GI-NEXT: ret |
3227 | 3285 | %tmp4 = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %A, i32 %B) |
3228 | 3286 | %tmp5 = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %C, i64 %tmp4) |
3229 | 3287 | ret i64 %tmp5 |
@@ -3256,14 +3314,23 @@ define i32 @sqdmlsl_s(i16 %A, i16 %B, i32 %C) nounwind { |
3256 | 3314 | } |
3257 | 3315 |
|
3258 | 3316 | define i64 @sqdmlsl_d(i32 %A, i32 %B, i64 %C) nounwind { |
3259 | | -; CHECK-LABEL: sqdmlsl_d: |
3260 | | -; CHECK: // %bb.0: |
3261 | | -; CHECK-NEXT: fmov s0, w1 |
3262 | | -; CHECK-NEXT: fmov s1, w0 |
3263 | | -; CHECK-NEXT: fmov d2, x2 |
3264 | | -; CHECK-NEXT: sqdmlsl d2, s1, s0 |
3265 | | -; CHECK-NEXT: fmov x0, d2 |
3266 | | -; CHECK-NEXT: ret |
| 3317 | +; CHECK-SD-LABEL: sqdmlsl_d: |
| 3318 | +; CHECK-SD: // %bb.0: |
| 3319 | +; CHECK-SD-NEXT: fmov s0, w1 |
| 3320 | +; CHECK-SD-NEXT: fmov s1, w0 |
| 3321 | +; CHECK-SD-NEXT: fmov d2, x2 |
| 3322 | +; CHECK-SD-NEXT: sqdmlsl d2, s1, s0 |
| 3323 | +; CHECK-SD-NEXT: fmov x0, d2 |
| 3324 | +; CHECK-SD-NEXT: ret |
| 3325 | +; |
| 3326 | +; CHECK-GI-LABEL: sqdmlsl_d: |
| 3327 | +; CHECK-GI: // %bb.0: |
| 3328 | +; CHECK-GI-NEXT: fmov s0, w0 |
| 3329 | +; CHECK-GI-NEXT: fmov s1, w1 |
| 3330 | +; CHECK-GI-NEXT: fmov d2, x2 |
| 3331 | +; CHECK-GI-NEXT: sqdmlsl d2, s0, s1 |
| 3332 | +; CHECK-GI-NEXT: fmov x0, d2 |
| 3333 | +; CHECK-GI-NEXT: ret |
3267 | 3334 | %tmp4 = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %A, i32 %B) |
3268 | 3335 | %tmp5 = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %C, i64 %tmp4) |
3269 | 3336 | ret i64 %tmp5 |
|
0 commit comments