Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions source/backend/cpu/riscv/rvv/MNNMatrixAdd.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#include <riscv_vector.h>

void MNNMatrixAdd(float *C, const float *A, const float *B,
size_t widthC4, size_t cStride, size_t aStride,
size_t bStride, size_t height) {
size_t total = widthC4 * 4;
for (size_t y = 0; y < height; ++y) {
auto a = A + aStride * y;
auto b = B + bStride * y;
auto c = C + cStride * y;

size_t n = total;
while (n > 0) {
size_t vl = __riscv_vsetvl_e32m8(n);
vfloat32m8_t va = __riscv_vle32_v_f32m8(a, vl);
vfloat32m8_t vb = __riscv_vle32_v_f32m8(b, vl);
vfloat32m8_t vc = __riscv_vfadd_vv_f32m8(va, vb, vl);
__riscv_vse32_v_f32m8(c, vc, vl);

a += vl;
b += vl;
c += vl;
n -= vl;
}
}
}
26 changes: 26 additions & 0 deletions source/backend/cpu/riscv/rvv/MNNMatrixMax.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#include <riscv_vector.h>

void MNNMatrixMax(float *C, const float *A, const float *B,
size_t widthC4, size_t cStride, size_t aStride,
size_t bStride, size_t height) {
size_t total = widthC4 * 4;
for (int y = 0; y < height; ++y) {
auto a = A + aStride * y;
auto b = B + bStride * y;
auto c = C + cStride * y;

size_t n = total;
while (n > 0) {
size_t vl = __riscv_vsetvl_e32m8(n);
vfloat32m8_t va = __riscv_vle32_v_f32m8(a, vl);
vfloat32m8_t vb = __riscv_vle32_v_f32m8(b, vl);
vfloat32m8_t vc = __riscv_vfmax_vv_f32m8(va, vb, vl);
__riscv_vse32_v_f32m8(c, vc, vl);

a += vl;
b += vl;
c += vl;
n -= vl;
}
}
}
26 changes: 26 additions & 0 deletions source/backend/cpu/riscv/rvv/MNNMatrixSub.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#include <riscv_vector.h>

void MNNMatrixSub(float *C, const float *A, const float *B,
size_t widthC4, size_t cStride, size_t aStride,
size_t bStride, size_t height) {
size_t total = widthC4 * 4;
for (int y = 0; y < height; ++y) {
auto a = A + aStride * y;
auto b = B + bStride * y;
auto c = C + cStride * y;

size_t n = total;
while (n > 0) {
size_t vl = __riscv_vsetvl_e32m8(n);
vfloat32m8_t va = __riscv_vle32_v_f32m8(a, vl);
vfloat32m8_t vb = __riscv_vle32_v_f32m8(b, vl);
vfloat32m8_t vc = __riscv_vfsub_vv_f32m8(va, vb, vl);
__riscv_vse32_v_f32m8(c, vc, vl);

a += vl;
b += vl;
c += vl;
n -= vl;
}
}
}
Loading