1+ ; RUN: llc < %s -march=sbf -sbf-expand-memcpy-in-order | FileCheck %s
2+
3+ ; Function Attrs: mustprogress nocallback nofree nounwind willreturn memory(argmem: readwrite)
4+ declare void @llvm.memcpy.p0.p0.i64 (ptr noalias nocapture writeonly , ptr noalias nocapture readonly , i64 , i1 immarg) #1
5+
6+ define void @memcpy_test_1 (ptr align 16 %a , ptr align 16 %b ) local_unnamed_addr #0 {
7+ entry:
8+ call void @llvm.memcpy.p0.p0.i64 (ptr align 16 %a , ptr align 16 %b , i64 32 , i1 0 )
9+
10+ ; 4 pairs of loads and stores
11+ ; CHECK: memcpy_test_1
12+ ; CHECK: ldxdw [[SCRATCH_REG:r[0-9]]], [[[SRC_REG:r[0-9]]] + 0]
13+ ; CHECK: stxdw [[[DST_REG:r[0-9]]] + 0], [[SCRATCH_REG:r[0-9]]]
14+ ; CHECK: ldxdw [[SCRATCH_REG:r[0-9]]], [[[SRC_REG:r[0-9]]] + 8]
15+ ; CHECK: stxdw [[[DST_REG:r[0-9]]] + 8], [[SCRATCH_REG:r[0-9]]]
16+ ; CHECK: ldxdw [[SCRATCH_REG:r[0-9]]], [[[SRC_REG:r[0-9]]] + 16]
17+ ; CHECK: stxdw [[[DST_REG:r[0-9]]] + 16], [[SCRATCH_REG:r[0-9]]]
18+ ; CHECK: ldxdw [[SCRATCH_REG:r[0-9]]], [[[SRC_REG:r[0-9]]] + 24]
19+ ; CHECK: stxdw [[[DST_REG:r[0-9]]] + 24], [[SCRATCH_REG:r[0-9]]]
20+ ret void
21+ }
22+
23+ define void @memcpy_test_2 (ptr align 16 %a , ptr align 16 %b ) local_unnamed_addr #0 {
24+ entry:
25+ call void @llvm.memcpy.p0.p0.i64 (ptr align 16 %a , ptr align 16 %b , i64 17 , i1 0 )
26+
27+ ; 2 pairs of loads and stores + 1 pair for the byte
28+ ; CHECK: memcpy_test_2
29+ ; CHECK: ldxdw [[SCRATCH_REG:r[0-9]]], [[[SRC_REG:r[0-9]]] + 0]
30+ ; CHECK: stxdw [[[DST_REG:r[0-9]]] + 0], [[SCRATCH_REG:r[0-9]]]
31+ ; CHECK: ldxdw [[SCRATCH_REG:r[0-9]]], [[[SRC_REG:r[0-9]]] + 8]
32+ ; CHECK: stxdw [[[DST_REG:r[0-9]]] + 8], [[SCRATCH_REG:r[0-9]]]
33+ ; CHECK: ldxb [[SCRATCH_REG:r[0-9]]], [[[SRC_REG:r[0-9]]] + 16]
34+ ; CHECK: stxb [[[DST_REG:r[0-9]]] + 16], [[SCRATCH_REG:r[0-9]]]
35+ ret void
36+ }
37+
38+ define void @memcpy_test_3 (ptr align 16 %a , ptr align 16 %b ) local_unnamed_addr #0 {
39+ entry:
40+ call void @llvm.memcpy.p0.p0.i64 (ptr align 16 %a , ptr align 16 %b , i64 18 , i1 0 )
41+
42+ ; 2 pairs of loads and stores + 1 pair for the 2 bytes
43+ ; CHECK: memcpy_test_3
44+ ; CHECK: ldxdw [[SCRATCH_REG:r[0-9]]], [[[SRC_REG:r[0-9]]] + 0]
45+ ; CHECK: stxdw [[[DST_REG:r[0-9]]] + 0], [[SCRATCH_REG:r[0-9]]]
46+ ; CHECK: ldxdw [[SCRATCH_REG:r[0-9]]], [[[SRC_REG:r[0-9]]] + 8]
47+ ; CHECK: stxdw [[[DST_REG:r[0-9]]] + 8], [[SCRATCH_REG:r[0-9]]]
48+ ; CHECK: ldxh [[SCRATCH_REG:r[0-9]]], [[[SRC_REG:r[0-9]]] + 16]
49+ ; CHECK: stxh [[[DST_REG:r[0-9]]] + 16], [[SCRATCH_REG:r[0-9]]]
50+ ret void
51+ }
52+
53+ define void @memcpy_test_4 (ptr align 16 %a , ptr align 16 %b ) local_unnamed_addr #0 {
54+ entry:
55+ call void @llvm.memcpy.p0.p0.i64 (ptr align 16 %a , ptr align 16 %b , i64 19 , i1 0 )
56+
57+ ; 2 pairs of loads and stores + 1 pair for the 3 bytes
58+ ; CHECK: memcpy_test_4
59+ ; CHECK: ldxdw [[SCRATCH_REG:r[0-9]]], [[[SRC_REG:r[0-9]]] + 0]
60+ ; CHECK: stxdw [[[DST_REG:r[0-9]]] + 0], [[SCRATCH_REG:r[0-9]]]
61+ ; CHECK: ldxdw [[SCRATCH_REG:r[0-9]]], [[[SRC_REG:r[0-9]]] + 8]
62+ ; CHECK: stxdw [[[DST_REG:r[0-9]]] + 8], [[SCRATCH_REG:r[0-9]]]
63+ ; CHECK: ldxw [[SCRATCH_REG:r[0-9]]], [[[SRC_REG:r[0-9]]] + 15]
64+ ; CHECK: stxw [[[DST_REG:r[0-9]]] + 15], [[SCRATCH_REG:r[0-9]]]
65+ ret void
66+ }
67+
68+ define void @memcpy_test_5 (ptr align 16 %a , ptr align 16 %b ) local_unnamed_addr #0 {
69+ entry:
70+ call void @llvm.memcpy.p0.p0.i64 (ptr align 16 %a , ptr align 16 %b , i64 21 , i1 0 )
71+
72+ ; 2 pairs of loads and stores + 1 pair for the 5 bytes
73+ ; CHECK: memcpy_test_5
74+ ; CHECK: ldxdw [[SCRATCH_REG:r[0-9]]], [[[SRC_REG:r[0-9]]] + 0]
75+ ; CHECK: stxdw [[[DST_REG:r[0-9]]] + 0], [[SCRATCH_REG:r[0-9]]]
76+ ; CHECK: ldxdw [[SCRATCH_REG:r[0-9]]], [[[SRC_REG:r[0-9]]] + 8]
77+ ; CHECK: stxdw [[[DST_REG:r[0-9]]] + 8], [[SCRATCH_REG:r[0-9]]]
78+ ; CHECK: ldxdw [[SCRATCH_REG:r[0-9]]], [[[SRC_REG:r[0-9]]] + 13]
79+ ; CHECK: stxdw [[[DST_REG:r[0-9]]] + 13], [[SCRATCH_REG:r[0-9]]]
80+ ret void
81+ }
82+
83+ define void @memcpy_test_6 (ptr align 16 %a , ptr align 16 %b ) local_unnamed_addr #0 {
84+ entry:
85+ call void @llvm.memcpy.p0.p0.i64 (ptr align 16 %a , ptr align 16 %b , i64 33 , i1 0 )
86+
87+ ; More than 32 bytes, call memcpy
88+ ; CHECK: memcpy_test_6
89+ ; CHECK: mov64 r3, 33
90+ ; CHECK: call memcpy
91+ ret void
92+ }
0 commit comments