@@ -47,6 +47,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
47
47
#define VX4 $vr21
48
48
#define res1 $vr19
49
49
#define res2 $vr20
50
+ #define RCP $f2
51
+ #define VALPHA $vr3
50
52
51
53
PROLOGUE
52
54
@@ -55,10 +57,30 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
55
57
LDINT INCX, 0 (INCX)
56
58
#endif
57
59
58
- vxor.v res1, res1, res1
59
- vxor.v res2, res2, res2
60
60
bge $r0, N, .L999
61
61
beq $r0, INCX, .L999
62
+ addi.d $sp, $sp, -32
63
+ st.d $ra, $sp, 0
64
+ st.d N, $sp, 8
65
+ st.d X, $sp, 16
66
+ st.d INCX, $sp, 24
67
+ #ifdef DYNAMIC_ARCH
68
+ bl camax_k_LA264
69
+ #else
70
+ bl camax_k
71
+ #endif
72
+ ld.d $ra, $sp, 0
73
+ ld.d N, $sp, 8
74
+ ld.d X, $sp, 16
75
+ ld.d INCX, $sp, 24
76
+ addi.d $sp, $sp, 32
77
+
78
+ frecip.s RCP, $f0
79
+ vreplvei.w VALPHA, $vr2, 0
80
+ vxor.v res1, res1, res1
81
+ vxor.v res2, res2, res2
82
+ fcmp.ceq.s $fcc0, $f0, $f19
83
+ bcnez $fcc0, .L999
62
84
li.d TEMP, 1
63
85
slli.d TEMP, TEMP, ZBASE_SHIFT
64
86
slli.d INCX, INCX, ZBASE_SHIFT
@@ -69,16 +91,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
69
91
70
92
.L10:
71
93
vld VX0, X, 0 * SIZE
72
- vfcvtl.d.s VX1, VX0
73
- vfcvth.d.s VX2, VX0
74
- vfmadd.d res1, VX1, VX1, res1
75
- vfmadd.d res2, VX2, VX2, res2
76
- vld VX0, X, 4 * SIZE
77
- vfcvtl.d.s VX3, VX0
78
- vfcvth.d.s VX4, VX0
79
- vfmadd.d res1, VX3, VX3, res1
80
- vfmadd.d res2, VX4, VX4, res2
81
94
addi.d I, I, -1
95
+ vld VX0, X, 0 * SIZE
96
+ vld VX1, X, 4 * SIZE
97
+ vfmul.s VX0, VX0, VALPHA
98
+ vfmul.s VX1, VX1, VALPHA
99
+
100
+ vfmadd.s res1, VX0, VX0, res1
101
+ vfmadd.s res2, VX1, VX1, res2
102
+
82
103
addi.d X, X, 8 * SIZE
83
104
blt $r0, I, .L10
84
105
b .L996
@@ -99,10 +120,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
99
120
vinsgr2vr.w VX0, t3, 2
100
121
vinsgr2vr.w VX0, t4, 3
101
122
add .d X, X, INCX
102
- vfcvtl.d.s VX1, VX0
103
- vfcvth.d.s VX2, VX0
104
- vfmadd.d res1, VX1, VX1, res1
105
- vfmadd.d res2, VX2, VX2, res2
123
+ vfmul.s VX0, VX0, VALPHA
124
+ vfmadd.s res1, VX0, VX0, res1
125
+
106
126
ld.w t1, X, 0 * SIZE
107
127
ld.w t2, X, 1 * SIZE
108
128
add .d X, X, INCX
@@ -113,19 +133,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
113
133
vinsgr2vr.w VX0, t3, 2
114
134
vinsgr2vr.w VX0, t4, 3
115
135
add .d X, X, INCX
116
- vfcvtl.d.s VX3, VX0
117
- vfcvth.d.s VX4, VX0
118
- vfmadd.d res1, VX3, VX3, res1
119
- vfmadd.d res2, VX4, VX4, res2
136
+ vfmul.s VX0, VX0, VALPHA
137
+ vfmadd.s res2, VX0, VX0, res2
138
+
120
139
addi.d I, I, -1
121
140
blt $r0, I, .L21
122
141
b .L996
123
142
.align 3
124
143
125
144
.L996:
126
- vfadd.d res1, res1, res2
127
- vreplvei.d VX1, res1, 1
128
- vfadd.d res1, VX1, res1
145
+ vfadd.s res1, res1, res2
146
+ vreplvei.w VX1, res1, 1
147
+ vreplvei.w VX2, res1, 2
148
+ vreplvei.w VX3, res1, 3
149
+ vfadd.s res1, VX1, res1
150
+ vfadd.s res1, VX2, res1
151
+ vfadd.s res1, VX3, res1
129
152
.align 3
130
153
131
154
.L997:
@@ -137,18 +160,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
137
160
fld .s a1, X, 0 * SIZE
138
161
fld .s a2, X, 1 * SIZE
139
162
addi.d I, I, -1
140
- fcvt.d. s a1, a1
141
- fcvt.d. s a2, a2
142
- fmadd.d res, a1, a1, res
143
- fmadd.d res, a2, a2, res
163
+ fmul . s a1, a1, RCP
164
+ fmul . s a2, a2, RCP
165
+ fmadd.s res, a1, a1, res
166
+ fmadd.s res, a2, a2, res
144
167
add .d X, X, INCX
145
168
blt $r0, I, .L998
146
169
.align 3
147
170
148
171
.L999:
149
- fsqrt .d res, res
172
+ fsqrt .s res, res
173
+ fmul .s $f0, res, $f0
150
174
move $r4, $r17
151
- fcvt.s.d $f0, $f19
152
175
jirl $r0, $r1, 0x0
153
176
.align 3
154
177
0 commit comments