Skip to content

Commit 8d487ef

Browse files
authored
Merge pull request #5124 from XiWeiGu/LoongArch64-LA264-lapack-fixed
LoongArch64: Fixed lapack test for LA264
2 parents e8b11a1 + 2c4a5cc commit 8d487ef

File tree

9 files changed

+402
-2158
lines changed

9 files changed

+402
-2158
lines changed

kernel/loongarch64/amax_lsx.S

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,17 +56,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5656
LDINT INCX, 0(INCX)
5757
#endif
5858

59+
vxor.v VM0, VM0, VM0
5960
bge $r0, N, .L999
6061
bge $r0, INCX, .L999
6162
li.d TEMP, 1
6263
slli.d TEMP, TEMP, BASE_SHIFT
6364
slli.d INCX, INCX, BASE_SHIFT
64-
#ifdef DOUBLE
65-
vldrepl.d VM0, X, 0
66-
#else
67-
vldrepl.w VM0, X, 0
68-
#endif
69-
VFSUB VM0, VM0, VM0
7065
bne INCX, TEMP, .L20
7166

7267
srai.d I, N, 3

kernel/loongarch64/cnrm2_lsx.S

Lines changed: 51 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4747
#define VX4 $vr21
4848
#define res1 $vr19
4949
#define res2 $vr20
50+
#define RCP $f2
51+
#define VALPHA $vr3
5052

5153
PROLOGUE
5254

@@ -55,10 +57,30 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5557
LDINT INCX, 0(INCX)
5658
#endif
5759

58-
vxor.v res1, res1, res1
59-
vxor.v res2, res2, res2
6060
bge $r0, N, .L999
6161
beq $r0, INCX, .L999
62+
addi.d $sp, $sp, -32
63+
st.d $ra, $sp, 0
64+
st.d N, $sp, 8
65+
st.d X, $sp, 16
66+
st.d INCX, $sp, 24
67+
#ifdef DYNAMIC_ARCH
68+
bl camax_k_LA264
69+
#else
70+
bl camax_k
71+
#endif
72+
ld.d $ra, $sp, 0
73+
ld.d N, $sp, 8
74+
ld.d X, $sp, 16
75+
ld.d INCX, $sp, 24
76+
addi.d $sp, $sp, 32
77+
78+
frecip.s RCP, $f0
79+
vreplvei.w VALPHA, $vr2, 0
80+
vxor.v res1, res1, res1
81+
vxor.v res2, res2, res2
82+
fcmp.ceq.s $fcc0, $f0, $f19
83+
bcnez $fcc0, .L999
6284
li.d TEMP, 1
6385
slli.d TEMP, TEMP, ZBASE_SHIFT
6486
slli.d INCX, INCX, ZBASE_SHIFT
@@ -69,16 +91,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
6991

7092
.L10:
7193
vld VX0, X, 0 * SIZE
72-
vfcvtl.d.s VX1, VX0
73-
vfcvth.d.s VX2, VX0
74-
vfmadd.d res1, VX1, VX1, res1
75-
vfmadd.d res2, VX2, VX2, res2
76-
vld VX0, X, 4 * SIZE
77-
vfcvtl.d.s VX3, VX0
78-
vfcvth.d.s VX4, VX0
79-
vfmadd.d res1, VX3, VX3, res1
80-
vfmadd.d res2, VX4, VX4, res2
8194
addi.d I, I, -1
95+
vld VX0, X, 0 * SIZE
96+
vld VX1, X, 4 * SIZE
97+
vfmul.s VX0, VX0, VALPHA
98+
vfmul.s VX1, VX1, VALPHA
99+
100+
vfmadd.s res1, VX0, VX0, res1
101+
vfmadd.s res2, VX1, VX1, res2
102+
82103
addi.d X, X, 8 * SIZE
83104
blt $r0, I, .L10
84105
b .L996
@@ -99,10 +120,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
99120
vinsgr2vr.w VX0, t3, 2
100121
vinsgr2vr.w VX0, t4, 3
101122
add.d X, X, INCX
102-
vfcvtl.d.s VX1, VX0
103-
vfcvth.d.s VX2, VX0
104-
vfmadd.d res1, VX1, VX1, res1
105-
vfmadd.d res2, VX2, VX2, res2
123+
vfmul.s VX0, VX0, VALPHA
124+
vfmadd.s res1, VX0, VX0, res1
125+
106126
ld.w t1, X, 0 * SIZE
107127
ld.w t2, X, 1 * SIZE
108128
add.d X, X, INCX
@@ -113,19 +133,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
113133
vinsgr2vr.w VX0, t3, 2
114134
vinsgr2vr.w VX0, t4, 3
115135
add.d X, X, INCX
116-
vfcvtl.d.s VX3, VX0
117-
vfcvth.d.s VX4, VX0
118-
vfmadd.d res1, VX3, VX3, res1
119-
vfmadd.d res2, VX4, VX4, res2
136+
vfmul.s VX0, VX0, VALPHA
137+
vfmadd.s res2, VX0, VX0, res2
138+
120139
addi.d I, I, -1
121140
blt $r0, I, .L21
122141
b .L996
123142
.align 3
124143

125144
.L996:
126-
vfadd.d res1, res1, res2
127-
vreplvei.d VX1, res1, 1
128-
vfadd.d res1, VX1, res1
145+
vfadd.s res1, res1, res2
146+
vreplvei.w VX1, res1, 1
147+
vreplvei.w VX2, res1, 2
148+
vreplvei.w VX3, res1, 3
149+
vfadd.s res1, VX1, res1
150+
vfadd.s res1, VX2, res1
151+
vfadd.s res1, VX3, res1
129152
.align 3
130153

131154
.L997:
@@ -137,18 +160,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
137160
fld.s a1, X, 0 * SIZE
138161
fld.s a2, X, 1 * SIZE
139162
addi.d I, I, -1
140-
fcvt.d.s a1, a1
141-
fcvt.d.s a2, a2
142-
fmadd.d res, a1, a1, res
143-
fmadd.d res, a2, a2, res
163+
fmul.s a1, a1, RCP
164+
fmul.s a2, a2, RCP
165+
fmadd.s res, a1, a1, res
166+
fmadd.s res, a2, a2, res
144167
add.d X, X, INCX
145168
blt $r0, I, .L998
146169
.align 3
147170

148171
.L999:
149-
fsqrt.d res, res
172+
fsqrt.s res, res
173+
fmul.s $f0, res, $f0
150174
move $r4, $r17
151-
fcvt.s.d $f0, $f19
152175
jirl $r0, $r1, 0x0
153176
.align 3
154177

kernel/loongarch64/copy_lsx.S

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -270,9 +270,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
270270
add.d Y, Y, INCY
271271
ST a2, Y, 0
272272
add.d Y, Y, INCY
273-
ST a3, X, 0
273+
ST a3, Y, 0
274274
add.d Y, Y, INCY
275-
ST a4, X, 0
275+
ST a4, Y, 0
276276
add.d Y, Y, INCY
277277
LD a1, X, 0
278278
add.d X, X, INCX
@@ -286,9 +286,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
286286
add.d Y, Y, INCY
287287
ST a2, Y, 0
288288
add.d Y, Y, INCY
289-
ST a3, X, 0
289+
ST a3, Y, 0
290290
add.d Y, Y, INCY
291-
ST a4, X, 0
291+
ST a4, Y, 0
292292
add.d Y, Y, INCY
293293
addi.d I, I, -1
294294
blt $r0, I, .L222

0 commit comments

Comments
 (0)