Skip to content

LoongArch64: Fixed lapack test for LA264 #5124

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 1 addition & 6 deletions kernel/loongarch64/amax_lsx.S
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
LDINT INCX, 0(INCX)
#endif

vxor.v VM0, VM0, VM0
bge $r0, N, .L999
bge $r0, INCX, .L999
li.d TEMP, 1
slli.d TEMP, TEMP, BASE_SHIFT
slli.d INCX, INCX, BASE_SHIFT
#ifdef DOUBLE
vldrepl.d VM0, X, 0
#else
vldrepl.w VM0, X, 0
#endif
VFSUB VM0, VM0, VM0
bne INCX, TEMP, .L20

srai.d I, N, 3
Expand Down
79 changes: 51 additions & 28 deletions kernel/loongarch64/cnrm2_lsx.S
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VX4 $vr21
#define res1 $vr19
#define res2 $vr20
#define RCP $f2
#define VALPHA $vr3

PROLOGUE

Expand All @@ -55,10 +57,30 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
LDINT INCX, 0(INCX)
#endif

vxor.v res1, res1, res1
vxor.v res2, res2, res2
bge $r0, N, .L999
beq $r0, INCX, .L999
addi.d $sp, $sp, -32
st.d $ra, $sp, 0
st.d N, $sp, 8
st.d X, $sp, 16
st.d INCX, $sp, 24
#ifdef DYNAMIC_ARCH
bl camax_k_LA264
#else
bl camax_k
#endif
ld.d $ra, $sp, 0
ld.d N, $sp, 8
ld.d X, $sp, 16
ld.d INCX, $sp, 24
addi.d $sp, $sp, 32

frecip.s RCP, $f0
vreplvei.w VALPHA, $vr2, 0
vxor.v res1, res1, res1
vxor.v res2, res2, res2
fcmp.ceq.s $fcc0, $f0, $f19
bcnez $fcc0, .L999
li.d TEMP, 1
slli.d TEMP, TEMP, ZBASE_SHIFT
slli.d INCX, INCX, ZBASE_SHIFT
Expand All @@ -69,16 +91,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.L10:
vld VX0, X, 0 * SIZE
vfcvtl.d.s VX1, VX0
vfcvth.d.s VX2, VX0
vfmadd.d res1, VX1, VX1, res1
vfmadd.d res2, VX2, VX2, res2
vld VX0, X, 4 * SIZE
vfcvtl.d.s VX3, VX0
vfcvth.d.s VX4, VX0
vfmadd.d res1, VX3, VX3, res1
vfmadd.d res2, VX4, VX4, res2
addi.d I, I, -1
vld VX0, X, 0 * SIZE
vld VX1, X, 4 * SIZE
vfmul.s VX0, VX0, VALPHA
vfmul.s VX1, VX1, VALPHA

vfmadd.s res1, VX0, VX0, res1
vfmadd.s res2, VX1, VX1, res2

addi.d X, X, 8 * SIZE
blt $r0, I, .L10
b .L996
Expand All @@ -99,10 +120,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vinsgr2vr.w VX0, t3, 2
vinsgr2vr.w VX0, t4, 3
add.d X, X, INCX
vfcvtl.d.s VX1, VX0
vfcvth.d.s VX2, VX0
vfmadd.d res1, VX1, VX1, res1
vfmadd.d res2, VX2, VX2, res2
vfmul.s VX0, VX0, VALPHA
vfmadd.s res1, VX0, VX0, res1

ld.w t1, X, 0 * SIZE
ld.w t2, X, 1 * SIZE
add.d X, X, INCX
Expand All @@ -113,19 +133,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vinsgr2vr.w VX0, t3, 2
vinsgr2vr.w VX0, t4, 3
add.d X, X, INCX
vfcvtl.d.s VX3, VX0
vfcvth.d.s VX4, VX0
vfmadd.d res1, VX3, VX3, res1
vfmadd.d res2, VX4, VX4, res2
vfmul.s VX0, VX0, VALPHA
vfmadd.s res2, VX0, VX0, res2

addi.d I, I, -1
blt $r0, I, .L21
b .L996
.align 3

.L996:
vfadd.d res1, res1, res2
vreplvei.d VX1, res1, 1
vfadd.d res1, VX1, res1
vfadd.s res1, res1, res2
vreplvei.w VX1, res1, 1
vreplvei.w VX2, res1, 2
vreplvei.w VX3, res1, 3
vfadd.s res1, VX1, res1
vfadd.s res1, VX2, res1
vfadd.s res1, VX3, res1
.align 3

.L997:
Expand All @@ -137,18 +160,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fld.s a1, X, 0 * SIZE
fld.s a2, X, 1 * SIZE
addi.d I, I, -1
fcvt.d.s a1, a1
fcvt.d.s a2, a2
fmadd.d res, a1, a1, res
fmadd.d res, a2, a2, res
fmul.s a1, a1, RCP
fmul.s a2, a2, RCP
fmadd.s res, a1, a1, res
fmadd.s res, a2, a2, res
add.d X, X, INCX
blt $r0, I, .L998
.align 3

.L999:
fsqrt.d res, res
fsqrt.s res, res
fmul.s $f0, res, $f0
move $r4, $r17
fcvt.s.d $f0, $f19
jirl $r0, $r1, 0x0
.align 3

Expand Down
8 changes: 4 additions & 4 deletions kernel/loongarch64/copy_lsx.S
Original file line number Diff line number Diff line change
Expand Up @@ -270,9 +270,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add.d Y, Y, INCY
ST a2, Y, 0
add.d Y, Y, INCY
ST a3, X, 0
ST a3, Y, 0
add.d Y, Y, INCY
ST a4, X, 0
ST a4, Y, 0
add.d Y, Y, INCY
LD a1, X, 0
add.d X, X, INCX
Expand All @@ -286,9 +286,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add.d Y, Y, INCY
ST a2, Y, 0
add.d Y, Y, INCY
ST a3, X, 0
ST a3, Y, 0
add.d Y, Y, INCY
ST a4, X, 0
ST a4, Y, 0
add.d Y, Y, INCY
addi.d I, I, -1
blt $r0, I, .L222
Expand Down
Loading
Loading