Skip to content

Commit eb130ea

Browse files
sunyuechipablodelara
authored andcommitted
erasure_code: R-V V ec_encode_data
banana_f3: rvv: erasure_code_encode_warm: runtime = 3065696 usecs, bandwidth 108 MB in 3.0657 sec = 35.37 MB/s erasure_code_decode_warm: runtime = 3001213 usecs, bandwidth 136 MB in 3.0012 sec = 45.47 MB/s c: erasure_code_encode_warm: runtime = 3002512 usecs, bandwidth 52 MB in 3.0025 sec = 17.34 MB/s erasure_code_decode_warm: runtime = 3065235 usecs, bandwidth 57 MB in 3.0652 sec = 18.69 MB/s Signed-off-by: sunyuechi <[email protected]>
1 parent c5d75f1 commit eb130ea

File tree

4 files changed

+175
-2
lines changed

4 files changed

+175
-2
lines changed

erasure_code/riscv64/Makefile.am

+2-1
Original file line numberDiff line numberDiff line change
@@ -31,4 +31,5 @@ lsrc_riscv64 += \
3131
erasure_code/riscv64/ec_multibinary_riscv64_dispatcher.c \
3232
erasure_code/riscv64/ec_multibinary_riscv64.S \
3333
erasure_code/riscv64/ec_gf_vect_mul_rvv.S \
34-
erasure_code/riscv64/ec_gf_vect_dot_prod_rvv.S
34+
erasure_code/riscv64/ec_gf_vect_dot_prod_rvv.S \
35+
erasure_code/riscv64/ec_encode_data_rvv.S
+154
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
/**********************************************************************
2+
Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS).
3+
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions
6+
are met:
7+
* Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
* Redistributions in binary form must reproduce the above copyright
10+
notice, this list of conditions and the following disclaimer in
11+
the documentation and/or other materials provided with the
12+
distribution.
13+
* Neither the name of ISCAS nor the names of its
14+
contributors may be used to endorse or promote products derived
15+
from this software without specific prior written permission.
16+
17+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21+
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28+
**********************************************************************/
29+
#if HAVE_RVV
30+
#include "ec_table.S"
31+
.option arch, +v
32+
.global ec_encode_data_rvv
33+
.type ec_encode_data_rvv, %function
34+
ec_encode_data_rvv:
35+
blez a2, 3f
36+
blez a0, 3f
37+
38+
lla t0, gff_base
39+
lla t1, gflog_base
40+
addi a3, a3, 1
41+
vsetvli zero, a1, e8, mf2, ta, ma
42+
vmv.v.i v20, 0
43+
li t3, 32
44+
mv a6, a0 // backup len
45+
mv a7, a5 // backup dest
46+
47+
csrr t5, vlenb // vlen/8
48+
srli t5, t5, 1 // mf2: vlen/16
49+
blt t5, a1, slow // vlen/16(hardware) < vlen(software)
50+
51+
2:
52+
li t2, 0 // l
53+
vlse8.v v24, (a3), t3 // v[j*32+1]
54+
vmsne.vi v12, v24, 0 // if v == 0
55+
vluxei8.v v24, (t1), v24 // gflag_base[v[]]
56+
ld a5, (a5)
57+
58+
1:
59+
vsetvli zero, zero, e8, mf2, ta, ma
60+
vle64.v v16, (a4) // src[j]
61+
vluxei64.v v16, (t2), v16 // src[j][i]
62+
vmsne.vi v0, v16, 0 // if src == 0
63+
vmand.mm v0, v0, v12 // if src == 0 || v == 0
64+
vluxei8.v v16, (t1), v16, v0.t // gflag_base[src[j][i]]
65+
vwaddu.vv v8, v16, v24, v0.t
66+
vmv.v.i v16, 0
67+
vsetvli zero, zero, e8, mf2, ta, mu
68+
vluxei16.v v16, (t0), v8, v0.t // gff_base[i]
69+
vxor.vv v20, v16, v20
70+
vmv.s.x v8, zero
71+
vredxor.vs v8, v20, v8
72+
vmv.x.s t5, v8
73+
addi a0, a0, -1 // len
74+
sb t5, (a5) // dest[0][i]
75+
addi t2, t2, 1 // src[j][i]
76+
vmv.v.i v20, 0
77+
addi a5, a5, 1 // dest[i]
78+
bnez a0, 1b
79+
80+
addi a2, a2, -1 // l(dests)
81+
addi a7, a7, 8
82+
mv a0, a6 // restore len
83+
mv a5, a7 // update unsigned char **dest
84+
slli t5, a1, 5 // += vlen * 32
85+
add a3, a3, t5
86+
bnez a2, 2b
87+
88+
ret
89+
90+
slow:
91+
addi sp, sp, -16
92+
sd s2, 0(sp)
93+
sd s3, 8(sp)
94+
mv s3, a4 // src
95+
mv s2, a3 // v
96+
mv t4, a1 // backup vlen
97+
98+
2:
99+
li t2, 0 // i < len
100+
ld a5, (a5)
101+
102+
1:
103+
vsetvli t6, a1, e8, mf2, ta, ma
104+
vle64.v v16, (a4) // src[j]
105+
vluxei64.v v16, (t2), v16 // src[j][i]
106+
vlse8.v v24, (a3), t3 // v[j*32+1]
107+
vmsne.vi v12, v24, 0 // if v == 0
108+
vmsne.vi v0, v16, 0 // if src == 0
109+
vmand.mm v0, v0, v12 // if src == 0 || v == 0
110+
vluxei8.v v16, (t1), v16, v0.t // gflag_base[src[j][i]]
111+
vluxei8.v v24, (t1), v24 // gflag_base[v[]]
112+
vwaddu.vv v8, v16, v24, v0.t
113+
vmv.v.i v16, 0
114+
vsetvli zero, zero, e8, mf2, ta, mu
115+
vluxei16.v v16, (t0), v8, v0.t // gff_base[i]
116+
vxor.vv v20, v16, v20
117+
sub a1, a1, t6
118+
slli t5, t6, 5
119+
add a3, a3, t5 // v += 32 * vlen
120+
slli t5, t6, 3
121+
add a4, a4, t5 // src += 8 * vlen
122+
bnez a1, 1b // for (j = 0; j < vlen; j++)
123+
124+
vsetvli zero, t4, e8, mf2, ta, ma
125+
vmv.s.x v8, zero
126+
vredxor.vs v8, v20, v8
127+
vmv.x.s t5, v8
128+
addi a0, a0, -1 // len
129+
sb t5, (a5) // dest[0][i]
130+
addi t2, t2, 1 // src[j][i]
131+
vmv.v.i v20, 0
132+
mv a1, t4 // restore vlen
133+
mv a3, s2 // restore v
134+
mv a4, s3 // restore src
135+
addi a5, a5, 1 // dest[i]
136+
bnez a0, 1b // for (i = 0; i < len; i++)
137+
138+
addi a2, a2, -1 // l(dests)
139+
addi a7, a7, 8 // for (l = 0; l < dests; l++)
140+
mv a0, a6 // restore len
141+
mv a5, a7
142+
slli t5, t4, 5
143+
add a3, a3, t5 // v += vlen * 32
144+
mv s2, a3
145+
bnez a2, 2b // for (l = 0; l < dests; l++) {
146+
147+
ld s2, 0(sp)
148+
ld s3, 8(sp)
149+
addi sp, sp, 16
150+
151+
3:
152+
ret
153+
154+
#endif

erasure_code/riscv64/ec_multibinary_riscv64.S

+2-1
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,13 @@
3232
#if HAVE_RVV
3333
mbin_interface gf_vect_mul
3434
mbin_interface gf_vect_dot_prod
35+
mbin_interface ec_encode_data
3536
#else
3637
mbin_interface_base gf_vect_mul gf_vect_mul_base
3738
mbin_interface_base gf_vect_dot_prod gf_vect_dot_prod_base
39+
mbin_interface_base ec_encode_data ec_encode_data_base
3840
#endif
3941

40-
mbin_interface_base ec_encode_data ec_encode_data_base
4142
mbin_interface_base ec_init_tables ec_init_tables_base
4243
mbin_interface_base ec_encode_data_update ec_encode_data_update_base
4344
mbin_interface_base gf_vect_mad gf_vect_mad_base

erasure_code/riscv64/ec_multibinary_riscv64_dispatcher.c

+17
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,12 @@ gf_vect_dot_prod_rvv(int len, int vlen, unsigned char *v, unsigned char **src, u
3737
extern void
3838
gf_vect_dot_prod_base(int len, int vlen, unsigned char *v, unsigned char **src,
3939
unsigned char *dest);
40+
extern void
41+
ec_encode_data_rvv(int len, int srcs, int dests, unsigned char *v, unsigned char **src,
42+
unsigned char **dest);
43+
extern void
44+
ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src,
45+
unsigned char **dest);
4046

4147
DEFINE_INTERFACE_DISPATCHER(gf_vect_mul)
4248
{
@@ -59,3 +65,14 @@ DEFINE_INTERFACE_DISPATCHER(gf_vect_dot_prod)
5965
#endif
6066
return gf_vect_dot_prod_base;
6167
}
68+
69+
DEFINE_INTERFACE_DISPATCHER(ec_encode_data)
70+
{
71+
#if HAVE_RVV
72+
const unsigned long hwcap = getauxval(AT_HWCAP);
73+
if (hwcap & HWCAP_RV('V'))
74+
return ec_encode_data_rvv;
75+
else
76+
#endif
77+
return ec_encode_data_base;
78+
}

0 commit comments

Comments
 (0)