@@ -17,69 +17,58 @@ define void @arm_cmplx_dot_prod_q15(ptr noundef %pSrcA, ptr noundef %pSrcB, i32
17
17
; CHECK-NEXT: mov.w r5, #0
18
18
; CHECK-NEXT: csel r7, r6, r5, hs
19
19
; CHECK-NEXT: add.w lr, r7, #1
20
- ; CHECK-NEXT: mov r4 , r5
21
- ; CHECK-NEXT: vldrh.u16 q0 , [r0], #32
20
+ ; CHECK-NEXT: mov r6 , r5
21
+ ; CHECK-NEXT: vldrh.u16 q1 , [r0], #32
22
22
; CHECK-NEXT: movs r7, #0
23
23
; CHECK-NEXT: mov r8, r5
24
- ; CHECK-NEXT: vldrh.u16 q1, [r1], #32
25
- ; CHECK-NEXT: vmlsldava.s16 r4, r7, q0, q1
26
- ; CHECK-NEXT: vldrh.u16 q2, [r0, #-16]
27
- ; CHECK-NEXT: vmlaldavax.s16 r8, r5, q0, q1
28
- ; CHECK-NEXT: vldrh.u16 q3, [r1, #-16]
29
- ; CHECK-NEXT: vmlsldava.s16 r4, r7, q2, q3
30
24
; CHECK-NEXT: vldrh.u16 q0, [r1], #32
31
- ; CHECK-NEXT: sub.w lr, lr, #1
32
- ; CHECK-NEXT: cmp.w lr, #0
33
- ; CHECK-NEXT: vldrh.u16 q1, [r0], #32
34
- ; CHECK-NEXT: beq .LBB0_3
35
25
; CHECK-NEXT: .p2align 2
36
26
; CHECK-NEXT: .LBB0_2: @ %while.body
37
27
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
38
- ; CHECK-NEXT: vmlaldavax.s16 r8, r5, q2, q3
39
- ; CHECK-NEXT: vldrh.u16 q3, [r1, #-16]
40
- ; CHECK-NEXT: vmlsldava.s16 r4, r7, q1, q0
28
+ ; CHECK-NEXT: vmlsldava.s16 r8, r7, q1, q0
41
29
; CHECK-NEXT: vldrh.u16 q2, [r0, #-16]
42
- ; CHECK-NEXT: vmlaldavax.s16 r8 , r5, q1, q0
43
- ; CHECK-NEXT: vldrh.u16 q1, [r0] , #32
44
- ; CHECK-NEXT: vmlsldava.s16 r4 , r7, q2, q3
30
+ ; CHECK-NEXT: vmlaldavax.s16 r6 , r5, q1, q0
31
+ ; CHECK-NEXT: vldrh.u16 q1, [r1 , #-16]
32
+ ; CHECK-NEXT: vmlsldava.s16 r8 , r7, q2, q1
45
33
; CHECK-NEXT: vldrh.u16 q0, [r1], #32
34
+ ; CHECK-NEXT: vmlaldavax.s16 r6, r5, q2, q1
35
+ ; CHECK-NEXT: vldrh.u16 q1, [r0], #32
46
36
; CHECK-NEXT: le lr, .LBB0_2
47
- ; CHECK-NEXT: .LBB0_3:
48
- ; CHECK-NEXT: vmlaldavax.s16 r8, r5, q2, q3
49
- ; CHECK-NEXT: movs r6, #14
50
- ; CHECK-NEXT: and.w r2, r6, r2, lsl #1
51
- ; CHECK-NEXT: vmlaldavax.s16 r8, r5, q1, q0
37
+ ; CHECK-NEXT: @ %bb.3: @ %do.body
38
+ ; CHECK-NEXT: movs r4, #14
39
+ ; CHECK-NEXT: and.w r2, r4, r2, lsl #1
40
+ ; CHECK-NEXT: vmlaldavax.s16 r6, r5, q1, q0
52
41
; CHECK-NEXT: vldrh.u16 q2, [r0, #-16]
53
- ; CHECK-NEXT: vmlsldava.s16 r4 , r7, q1, q0
42
+ ; CHECK-NEXT: vmlsldava.s16 r8 , r7, q1, q0
54
43
; CHECK-NEXT: vldrh.u16 q0, [r1, #-16]
55
- ; CHECK-NEXT: vmlaldavax.s16 r8 , r5, q2, q0
44
+ ; CHECK-NEXT: vmlaldavax.s16 r6 , r5, q2, q0
56
45
; CHECK-NEXT: vctp.16 r2
57
- ; CHECK-NEXT: vmlsldava.s16 r4 , r7, q2, q0
46
+ ; CHECK-NEXT: vmlsldava.s16 r8 , r7, q2, q0
58
47
; CHECK-NEXT: vpst
59
48
; CHECK-NEXT: vldrht.u16 q1, [r0]
60
49
; CHECK-NEXT: cmp r2, #9
61
50
; CHECK-NEXT: vpsttt
62
51
; CHECK-NEXT: vldrht.u16 q0, [r1]
63
- ; CHECK-NEXT: vmlsldavat.s16 r4 , r7, q1, q0
64
- ; CHECK-NEXT: vmlaldavaxt.s16 r8 , r5, q1, q0
52
+ ; CHECK-NEXT: vmlsldavat.s16 r8 , r7, q1, q0
53
+ ; CHECK-NEXT: vmlaldavaxt.s16 r6 , r5, q1, q0
65
54
; CHECK-NEXT: blo .LBB0_10
66
55
; CHECK-NEXT: @ %bb.4: @ %do.body.1
67
56
; CHECK-NEXT: subs r2, #8
68
57
; CHECK-NEXT: vctp.16 r2
69
58
; CHECK-NEXT: vpstttt
70
59
; CHECK-NEXT: vldrht.u16 q0, [r0, #16]
71
60
; CHECK-NEXT: vldrht.u16 q1, [r1, #16]
72
- ; CHECK-NEXT: vmlsldavat.s16 r4 , r7, q0, q1
73
- ; CHECK-NEXT: vmlaldavaxt.s16 r8 , r5, q0, q1
61
+ ; CHECK-NEXT: vmlsldavat.s16 r8 , r7, q0, q1
62
+ ; CHECK-NEXT: vmlaldavaxt.s16 r6 , r5, q0, q1
74
63
; CHECK-NEXT: b .LBB0_10
75
64
; CHECK-NEXT: .p2align 2
76
65
; CHECK-NEXT: .LBB0_5: @ %if.else
77
- ; CHECK-NEXT: mov.w r4 , #0
66
+ ; CHECK-NEXT: mov.w r8 , #0
78
67
; CHECK-NEXT: cbz r2, .LBB0_9
79
68
; CHECK-NEXT: @ %bb.6: @ %while.body14.preheader
80
69
; CHECK-NEXT: lsls r6, r2, #1
81
- ; CHECK-NEXT: mov r5, r4
82
- ; CHECK-NEXT: mov r7, r4
70
+ ; CHECK-NEXT: mov r5, r8
71
+ ; CHECK-NEXT: mov r7, r8
83
72
; CHECK-NEXT: movs r2, #0
84
73
; CHECK-NEXT: dlstp.16 lr, r6
85
74
; CHECK-NEXT: .p2align 2
@@ -88,22 +77,22 @@ define void @arm_cmplx_dot_prod_q15(ptr noundef %pSrcA, ptr noundef %pSrcB, i32
88
77
; CHECK-NEXT: vldrh.u16 q0, [r0], #16
89
78
; CHECK-NEXT: vldrh.u16 q1, [r1], #16
90
79
; CHECK-NEXT: vmlsldava.s16 r2, r7, q0, q1
91
- ; CHECK-NEXT: vmlaldavax.s16 r4 , r5, q0, q1
80
+ ; CHECK-NEXT: vmlaldavax.s16 r8 , r5, q0, q1
92
81
; CHECK-NEXT: letp lr, .LBB0_7
93
82
; CHECK-NEXT: @ %bb.8: @ %if.end.loopexit177
94
- ; CHECK-NEXT: mov r8, r4
95
- ; CHECK-NEXT: mov r4 , r2
83
+ ; CHECK-NEXT: mov r6, r8
84
+ ; CHECK-NEXT: mov r8 , r2
96
85
; CHECK-NEXT: b .LBB0_10
97
86
; CHECK-NEXT: .p2align 2
98
87
; CHECK-NEXT: .LBB0_9:
99
- ; CHECK-NEXT: mov r7, r4
100
- ; CHECK-NEXT: mov.w r8 , #0
101
- ; CHECK-NEXT: mov r5, r4
88
+ ; CHECK-NEXT: mov r7, r8
89
+ ; CHECK-NEXT: movs r6 , #0
90
+ ; CHECK-NEXT: mov r5, r8
102
91
; CHECK-NEXT: .LBB0_10: @ %if.end
103
- ; CHECK-NEXT: asrl r4 , r7, #6
104
- ; CHECK-NEXT: asrl r8 , r5, #6
105
- ; CHECK-NEXT: str r4 , [r3]
106
- ; CHECK-NEXT: str.w r8 , [r12]
92
+ ; CHECK-NEXT: asrl r8 , r7, #6
93
+ ; CHECK-NEXT: asrl r6 , r5, #6
94
+ ; CHECK-NEXT: str.w r8 , [r3]
95
+ ; CHECK-NEXT: str.w r6 , [r12]
107
96
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
108
97
entry:
109
98
%cmp = icmp ugt i32 %numSamples , 15
0 commit comments